• Main Page
  • Related Pages
  • Modules
  • Data Structures
  • Files
  • Examples
  • File List
  • Globals

libavcodec/h264.c

Go to the documentation of this file.
00001 /*
00002  * H.26L/H.264/AVC/JVT/14496-10/... decoder
00003  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
00004  *
00005  * This file is part of Libav.
00006  *
00007  * Libav is free software; you can redistribute it and/or
00008  * modify it under the terms of the GNU Lesser General Public
00009  * License as published by the Free Software Foundation; either
00010  * version 2.1 of the License, or (at your option) any later version.
00011  *
00012  * Libav is distributed in the hope that it will be useful,
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * Lesser General Public License for more details.
00016  *
00017  * You should have received a copy of the GNU Lesser General Public
00018  * License along with Libav; if not, write to the Free Software
00019  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00020  */
00021 
00028 #include "libavutil/imgutils.h"
00029 #include "internal.h"
00030 #include "cabac.h"
00031 #include "cabac_functions.h"
00032 #include "dsputil.h"
00033 #include "avcodec.h"
00034 #include "mpegvideo.h"
00035 #include "h264.h"
00036 #include "h264data.h"
00037 #include "h264_mvpred.h"
00038 #include "golomb.h"
00039 #include "mathops.h"
00040 #include "rectangle.h"
00041 #include "thread.h"
00042 #include "vdpau_internal.h"
00043 #include "libavutil/avassert.h"
00044 
00045 //#undef NDEBUG
00046 #include <assert.h>
00047 
00048 static const uint8_t rem6[QP_MAX_NUM+1]={
00049 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
00050 };
00051 
00052 static const uint8_t div6[QP_MAX_NUM+1]={
00053 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,
00054 };
00055 
00056 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
00057     PIX_FMT_DXVA2_VLD,
00058     PIX_FMT_VAAPI_VLD,
00059     PIX_FMT_VDA_VLD,
00060     PIX_FMT_YUVJ420P,
00061     PIX_FMT_NONE
00062 };
00063 
00068 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
00069     MpegEncContext * const s = &h->s;
00070     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
00071     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
00072     int i;
00073 
00074     if(!(h->top_samples_available&0x8000)){
00075         for(i=0; i<4; i++){
00076             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
00077             if(status<0){
00078                 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00079                 return -1;
00080             } else if(status){
00081                 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
00082             }
00083         }
00084     }
00085 
00086     if((h->left_samples_available&0x8888)!=0x8888){
00087         static const int mask[4]={0x8000,0x2000,0x80,0x20};
00088         for(i=0; i<4; i++){
00089             if(!(h->left_samples_available&mask[i])){
00090                 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
00091                 if(status<0){
00092                     av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
00093                     return -1;
00094                 } else if(status){
00095                     h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
00096                 }
00097             }
00098         }
00099     }
00100 
00101     return 0;
00102 } //FIXME cleanup like ff_h264_check_intra_pred_mode
00103 
00108 int ff_h264_check_intra_pred_mode(H264Context *h, int mode, int is_chroma){
00109     MpegEncContext * const s = &h->s;
00110     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
00111     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
00112 
00113     if(mode > 6U) {
00114         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
00115         return -1;
00116     }
00117 
00118     if(!(h->top_samples_available&0x8000)){
00119         mode= top[ mode ];
00120         if(mode<0){
00121             av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00122             return -1;
00123         }
00124     }
00125 
00126     if((h->left_samples_available&0x8080) != 0x8080){
00127         mode= left[ mode ];
00128         if(is_chroma && (h->left_samples_available&0x8080)){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
00129             mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
00130         }
00131         if(mode<0){
00132             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
00133             return -1;
00134         }
00135     }
00136 
00137     return mode;
00138 }
00139 
00140 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
00141     int i, si, di;
00142     uint8_t *dst;
00143     int bufidx;
00144 
00145 //    src[0]&0x80;                //forbidden bit
00146     h->nal_ref_idc= src[0]>>5;
00147     h->nal_unit_type= src[0]&0x1F;
00148 
00149     src++; length--;
00150 
00151 #if HAVE_FAST_UNALIGNED
00152 # if HAVE_FAST_64BIT
00153 #   define RS 7
00154     for(i=0; i+1<length; i+=9){
00155         if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
00156 # else
00157 #   define RS 3
00158     for(i=0; i+1<length; i+=5){
00159         if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
00160 # endif
00161             continue;
00162         if(i>0 && !src[i]) i--;
00163         while(src[i]) i++;
00164 #else
00165 #   define RS 0
00166     for(i=0; i+1<length; i+=2){
00167         if(src[i]) continue;
00168         if(i>0 && src[i-1]==0) i--;
00169 #endif
00170         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
00171             if(src[i+2]!=3){
00172                 /* startcode, so we must be past the end */
00173                 length=i;
00174             }
00175             break;
00176         }
00177         i-= RS;
00178     }
00179 
00180     if(i>=length-1){ //no escaped 0
00181         *dst_length= length;
00182         *consumed= length+1; //+1 for the header
00183         return src;
00184     }
00185 
00186     bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
00187     av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
00188     dst= h->rbsp_buffer[bufidx];
00189 
00190     if (dst == NULL){
00191         return NULL;
00192     }
00193 
00194 //printf("decoding esc\n");
00195     memcpy(dst, src, i);
00196     si=di=i;
00197     while(si+2<length){
00198         //remove escapes (very rare 1:2^22)
00199         if(src[si+2]>3){
00200             dst[di++]= src[si++];
00201             dst[di++]= src[si++];
00202         }else if(src[si]==0 && src[si+1]==0){
00203             if(src[si+2]==3){ //escape
00204                 dst[di++]= 0;
00205                 dst[di++]= 0;
00206                 si+=3;
00207                 continue;
00208             }else //next start code
00209                 goto nsc;
00210         }
00211 
00212         dst[di++]= src[si++];
00213     }
00214     while(si<length)
00215         dst[di++]= src[si++];
00216 nsc:
00217 
00218     memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
00219 
00220     *dst_length= di;
00221     *consumed= si + 1;//+1 for the header
00222 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
00223     return dst;
00224 }
00225 
00230 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
00231     int v= *src;
00232     int r;
00233 
00234     tprintf(h->s.avctx, "rbsp trailing %X\n", v);
00235 
00236     for(r=1; r<9; r++){
00237         if(v&1) return r;
00238         v>>=1;
00239     }
00240     return 0;
00241 }
00242 
00243 static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,
00244                                  int y_offset, int list){
00245     int raw_my= h->mv_cache[list][ scan8[n] ][1];
00246     int filter_height= (raw_my&3) ? 2 : 0;
00247     int full_my= (raw_my>>2) + y_offset;
00248     int top = full_my - filter_height, bottom = full_my + height + filter_height;
00249 
00250     return FFMAX(abs(top), bottom);
00251 }
00252 
00253 static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,
00254                                int y_offset, int list0, int list1, int *nrefs){
00255     MpegEncContext * const s = &h->s;
00256     int my;
00257 
00258     y_offset += 16*(s->mb_y >> MB_FIELD);
00259 
00260     if(list0){
00261         int ref_n = h->ref_cache[0][ scan8[n] ];
00262         Picture *ref= &h->ref_list[0][ref_n];
00263 
00264         // Error resilience puts the current picture in the ref list.
00265         // Don't try to wait on these as it will cause a deadlock.
00266         // Fields can wait on each other, though.
00267         if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
00268            (ref->f.reference & 3) != s->picture_structure) {
00269             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);
00270             if (refs[0][ref_n] < 0) nrefs[0] += 1;
00271             refs[0][ref_n] = FFMAX(refs[0][ref_n], my);
00272         }
00273     }
00274 
00275     if(list1){
00276         int ref_n = h->ref_cache[1][ scan8[n] ];
00277         Picture *ref= &h->ref_list[1][ref_n];
00278 
00279         if (ref->f.thread_opaque != s->current_picture.f.thread_opaque ||
00280            (ref->f.reference & 3) != s->picture_structure) {
00281             my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);
00282             if (refs[1][ref_n] < 0) nrefs[1] += 1;
00283             refs[1][ref_n] = FFMAX(refs[1][ref_n], my);
00284         }
00285     }
00286 }
00287 
00293 static void await_references(H264Context *h){
00294     MpegEncContext * const s = &h->s;
00295     const int mb_xy= h->mb_xy;
00296     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00297     int refs[2][48];
00298     int nrefs[2] = {0};
00299     int ref, list;
00300 
00301     memset(refs, -1, sizeof(refs));
00302 
00303     if(IS_16X16(mb_type)){
00304         get_lowest_part_y(h, refs, 0, 16, 0,
00305                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00306     }else if(IS_16X8(mb_type)){
00307         get_lowest_part_y(h, refs, 0, 8, 0,
00308                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00309         get_lowest_part_y(h, refs, 8, 8, 8,
00310                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00311     }else if(IS_8X16(mb_type)){
00312         get_lowest_part_y(h, refs, 0, 16, 0,
00313                   IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);
00314         get_lowest_part_y(h, refs, 4, 16, 0,
00315                   IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);
00316     }else{
00317         int i;
00318 
00319         assert(IS_8X8(mb_type));
00320 
00321         for(i=0; i<4; i++){
00322             const int sub_mb_type= h->sub_mb_type[i];
00323             const int n= 4*i;
00324             int y_offset= (i&2)<<2;
00325 
00326             if(IS_SUB_8X8(sub_mb_type)){
00327                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00328                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00329             }else if(IS_SUB_8X4(sub_mb_type)){
00330                 get_lowest_part_y(h, refs, n  , 4, y_offset,
00331                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00332                 get_lowest_part_y(h, refs, n+2, 4, y_offset+4,
00333                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00334             }else if(IS_SUB_4X8(sub_mb_type)){
00335                 get_lowest_part_y(h, refs, n  , 8, y_offset,
00336                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00337                 get_lowest_part_y(h, refs, n+1, 8, y_offset,
00338                           IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00339             }else{
00340                 int j;
00341                 assert(IS_SUB_4X4(sub_mb_type));
00342                 for(j=0; j<4; j++){
00343                     int sub_y_offset= y_offset + 2*(j&2);
00344                     get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,
00345                               IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);
00346                 }
00347             }
00348         }
00349     }
00350 
00351     for(list=h->list_count-1; list>=0; list--){
00352         for(ref=0; ref<48 && nrefs[list]; ref++){
00353             int row = refs[list][ref];
00354             if(row >= 0){
00355                 Picture *ref_pic = &h->ref_list[list][ref];
00356                 int ref_field = ref_pic->f.reference - 1;
00357                 int ref_field_picture = ref_pic->field_picture;
00358                 int pic_height = 16*s->mb_height >> ref_field_picture;
00359 
00360                 row <<= MB_MBAFF;
00361                 nrefs[list]--;
00362 
00363                 if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields
00364                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);
00365                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1)           , pic_height-1), 0);
00366                 }else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame
00367                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field    , pic_height-1), 0);
00368                 }else if(FIELD_PICTURE){
00369                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);
00370                 }else{
00371                     ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);
00372                 }
00373             }
00374         }
00375     }
00376 }
00377 
00378 #if 0
00379 
00383 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
00384 //    const int qmul= dequant_coeff[qp][0];
00385     int i;
00386     int temp[16]; //FIXME check if this is a good idea
00387     static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
00388     static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
00389 
00390     for(i=0; i<4; i++){
00391         const int offset= y_offset[i];
00392         const int z0= block[offset+stride*0] + block[offset+stride*4];
00393         const int z1= block[offset+stride*0] - block[offset+stride*4];
00394         const int z2= block[offset+stride*1] - block[offset+stride*5];
00395         const int z3= block[offset+stride*1] + block[offset+stride*5];
00396 
00397         temp[4*i+0]= z0+z3;
00398         temp[4*i+1]= z1+z2;
00399         temp[4*i+2]= z1-z2;
00400         temp[4*i+3]= z0-z3;
00401     }
00402 
00403     for(i=0; i<4; i++){
00404         const int offset= x_offset[i];
00405         const int z0= temp[4*0+i] + temp[4*2+i];
00406         const int z1= temp[4*0+i] - temp[4*2+i];
00407         const int z2= temp[4*1+i] - temp[4*3+i];
00408         const int z3= temp[4*1+i] + temp[4*3+i];
00409 
00410         block[stride*0 +offset]= (z0 + z3)>>1;
00411         block[stride*2 +offset]= (z1 + z2)>>1;
00412         block[stride*8 +offset]= (z1 - z2)>>1;
00413         block[stride*10+offset]= (z0 - z3)>>1;
00414     }
00415 }
00416 #endif
00417 
00418 #undef xStride
00419 #undef stride
00420 
00421 #if 0
00422 static void chroma_dc_dct_c(DCTELEM *block){
00423     const int stride= 16*2;
00424     const int xStride= 16;
00425     int a,b,c,d,e;
00426 
00427     a= block[stride*0 + xStride*0];
00428     b= block[stride*0 + xStride*1];
00429     c= block[stride*1 + xStride*0];
00430     d= block[stride*1 + xStride*1];
00431 
00432     e= a-b;
00433     a= a+b;
00434     b= c-d;
00435     c= c+d;
00436 
00437     block[stride*0 + xStride*0]= (a+c);
00438     block[stride*0 + xStride*1]= (e+b);
00439     block[stride*1 + xStride*0]= (a-c);
00440     block[stride*1 + xStride*1]= (e-b);
00441 }
00442 #endif
00443 
00444 static av_always_inline void
00445 mc_dir_part(H264Context *h, Picture *pic, int n, int square,
00446             int height, int delta, int list,
00447             uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00448             int src_x_offset, int src_y_offset,
00449             qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,
00450             int pixel_shift, int chroma_idc)
00451 {
00452     MpegEncContext * const s = &h->s;
00453     const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
00454     int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
00455     const int luma_xy= (mx&3) + ((my&3)<<2);
00456     int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;
00457     uint8_t * src_y = pic->f.data[0] + offset;
00458     uint8_t * src_cb, * src_cr;
00459     int extra_width= h->emu_edge_width;
00460     int extra_height= h->emu_edge_height;
00461     int emu=0;
00462     const int full_mx= mx>>2;
00463     const int full_my= my>>2;
00464     const int pic_width  = 16*s->mb_width;
00465     const int pic_height = 16*s->mb_height >> MB_FIELD;
00466     int ysh;
00467 
00468     if(mx&7) extra_width -= 3;
00469     if(my&7) extra_height -= 3;
00470 
00471     if(   full_mx < 0-extra_width
00472        || full_my < 0-extra_height
00473        || full_mx + 16/*FIXME*/ > pic_width + extra_width
00474        || full_my + 16/*FIXME*/ > pic_height + extra_height){
00475         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00476                                 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00477             src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00478         emu=1;
00479     }
00480 
00481     qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
00482     if(!square){
00483         qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
00484     }
00485 
00486     if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
00487 
00488     if(chroma_idc == 3 /* yuv444 */){
00489         src_cb = pic->f.data[1] + offset;
00490         if(emu){
00491             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00492                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00493             src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00494         }
00495         qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?
00496         if(!square){
00497             qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);
00498         }
00499 
00500         src_cr = pic->f.data[2] + offset;
00501         if(emu){
00502             s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,
00503                                     16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
00504             src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;
00505         }
00506         qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?
00507         if(!square){
00508             qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);
00509         }
00510         return;
00511     }
00512 
00513     ysh = 3 - (chroma_idc == 2 /* yuv422 */);
00514     if(chroma_idc == 1 /* yuv420 */ && MB_FIELD){
00515         // chroma offset when predicting from a field of opposite parity
00516         my += 2 * ((s->mb_y & 1) - (pic->f.reference - 1));
00517         emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
00518     }
00519 
00520     src_cb = pic->f.data[1] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
00521     src_cr = pic->f.data[2] + ((mx >> 3) << pixel_shift) + (my >> ysh) * h->mb_uvlinesize;
00522 
00523     if(emu){
00524         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize,
00525                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
00526                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
00527             src_cb= s->edge_emu_buffer;
00528     }
00529     chroma_op(dest_cb, src_cb, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
00530               mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
00531 
00532     if(emu){
00533         s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize,
00534                                 9, 8 * chroma_idc + 1, (mx >> 3), (my >> ysh),
00535                                 pic_width >> 1, pic_height >> (chroma_idc == 1 /* yuv420 */));
00536             src_cr= s->edge_emu_buffer;
00537     }
00538     chroma_op(dest_cr, src_cr, h->mb_uvlinesize, height >> (chroma_idc == 1 /* yuv420 */),
00539               mx&7, (my << (chroma_idc == 2 /* yuv422 */)) &7);
00540 }
00541 
00542 static av_always_inline void
00543 mc_part_std(H264Context *h, int n, int square, int height, int delta,
00544             uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00545             int x_offset, int y_offset,
00546             qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00547             qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00548             int list0, int list1, int pixel_shift, int chroma_idc)
00549 {
00550     MpegEncContext * const s = &h->s;
00551     qpel_mc_func *qpix_op=  qpix_put;
00552     h264_chroma_mc_func chroma_op= chroma_put;
00553 
00554     dest_y  += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00555     if (chroma_idc == 3 /* yuv444 */) {
00556         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00557         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00558     } else if (chroma_idc == 2 /* yuv422 */) {
00559         dest_cb += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00560         dest_cr += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00561     } else /* yuv420 */ {
00562         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00563         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00564     }
00565     x_offset += 8*s->mb_x;
00566     y_offset += 8*(s->mb_y >> MB_FIELD);
00567 
00568     if(list0){
00569         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
00570         mc_dir_part(h, ref, n, square, height, delta, 0,
00571                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00572                            qpix_op, chroma_op, pixel_shift, chroma_idc);
00573 
00574         qpix_op=  qpix_avg;
00575         chroma_op= chroma_avg;
00576     }
00577 
00578     if(list1){
00579         Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
00580         mc_dir_part(h, ref, n, square, height, delta, 1,
00581                            dest_y, dest_cb, dest_cr, x_offset, y_offset,
00582                            qpix_op, chroma_op, pixel_shift, chroma_idc);
00583     }
00584 }
00585 
00586 static av_always_inline void
00587 mc_part_weighted(H264Context *h, int n, int square, int height, int delta,
00588                  uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00589                  int x_offset, int y_offset,
00590                  qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00591                  h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
00592                  h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
00593                  int list0, int list1, int pixel_shift, int chroma_idc){
00594     MpegEncContext * const s = &h->s;
00595     int chroma_height;
00596 
00597     dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00598     if (chroma_idc == 3 /* yuv444 */) {
00599         chroma_height = height;
00600         chroma_weight_avg = luma_weight_avg;
00601         chroma_weight_op = luma_weight_op;
00602         dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00603         dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;
00604     } else if (chroma_idc == 2 /* yuv422 */) {
00605         chroma_height = height;
00606         dest_cb += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00607         dest_cr += (  x_offset << pixel_shift) + 2*y_offset*h->mb_uvlinesize;
00608     } else /* yuv420 */ {
00609         chroma_height = height >> 1;
00610         dest_cb += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00611         dest_cr += (  x_offset << pixel_shift) +   y_offset*h->mb_uvlinesize;
00612     }
00613     x_offset += 8*s->mb_x;
00614     y_offset += 8*(s->mb_y >> MB_FIELD);
00615 
00616     if(list0 && list1){
00617         /* don't optimize for luma-only case, since B-frames usually
00618          * use implicit weights => chroma too. */
00619         uint8_t *tmp_cb = s->obmc_scratchpad;
00620         uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);
00621         uint8_t *tmp_y  = s->obmc_scratchpad + 16*h->mb_uvlinesize;
00622         int refn0 = h->ref_cache[0][ scan8[n] ];
00623         int refn1 = h->ref_cache[1][ scan8[n] ];
00624 
00625         mc_dir_part(h, &h->ref_list[0][refn0], n, square, height, delta, 0,
00626                     dest_y, dest_cb, dest_cr,
00627                     x_offset, y_offset, qpix_put, chroma_put,
00628                     pixel_shift, chroma_idc);
00629         mc_dir_part(h, &h->ref_list[1][refn1], n, square, height, delta, 1,
00630                     tmp_y, tmp_cb, tmp_cr,
00631                     x_offset, y_offset, qpix_put, chroma_put,
00632                     pixel_shift, chroma_idc);
00633 
00634         if(h->use_weight == 2){
00635             int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
00636             int weight1 = 64 - weight0;
00637             luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize,
00638                               height,        5, weight0, weight1, 0);
00639             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize,
00640                               chroma_height, 5, weight0, weight1, 0);
00641             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize,
00642                               chroma_height, 5, weight0, weight1, 0);
00643         }else{
00644             luma_weight_avg(dest_y, tmp_y, h->mb_linesize, height, h->luma_log2_weight_denom,
00645                             h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
00646                             h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
00647             chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00648                             h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
00649                             h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
00650             chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00651                             h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
00652                             h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
00653         }
00654     }else{
00655         int list = list1 ? 1 : 0;
00656         int refn = h->ref_cache[list][ scan8[n] ];
00657         Picture *ref= &h->ref_list[list][refn];
00658         mc_dir_part(h, ref, n, square, height, delta, list,
00659                     dest_y, dest_cb, dest_cr, x_offset, y_offset,
00660                     qpix_put, chroma_put, pixel_shift, chroma_idc);
00661 
00662         luma_weight_op(dest_y, h->mb_linesize, height, h->luma_log2_weight_denom,
00663                        h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
00664         if(h->use_weight_chroma){
00665             chroma_weight_op(dest_cb, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00666                              h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
00667             chroma_weight_op(dest_cr, h->mb_uvlinesize, chroma_height, h->chroma_log2_weight_denom,
00668                              h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
00669         }
00670     }
00671 }
00672 
00673 static av_always_inline void
00674 mc_part(H264Context *h, int n, int square, int height, int delta,
00675         uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00676         int x_offset, int y_offset,
00677         qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
00678         qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
00679         h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00680         int list0, int list1, int pixel_shift, int chroma_idc)
00681 {
00682     if((h->use_weight==2 && list0 && list1
00683         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
00684        || h->use_weight==1)
00685         mc_part_weighted(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
00686                          x_offset, y_offset, qpix_put, chroma_put,
00687                          weight_op[0], weight_op[1], weight_avg[0],
00688                          weight_avg[1], list0, list1, pixel_shift, chroma_idc);
00689     else
00690         mc_part_std(h, n, square, height, delta, dest_y, dest_cb, dest_cr,
00691                     x_offset, y_offset, qpix_put, chroma_put, qpix_avg,
00692                     chroma_avg, list0, list1, pixel_shift, chroma_idc);
00693 }
00694 
00695 static av_always_inline void
00696 prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma_idc)
00697 {
00698     /* fetch pixels for estimated mv 4 macroblocks ahead
00699      * optimized for 64byte cache lines */
00700     MpegEncContext * const s = &h->s;
00701     const int refn = h->ref_cache[list][scan8[0]];
00702     if(refn >= 0){
00703         const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
00704         const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
00705         uint8_t **src = h->ref_list[list][refn].f.data;
00706         int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);
00707         s->dsp.prefetch(src[0]+off, s->linesize, 4);
00708         if (chroma_idc == 3 /* yuv444 */) {
00709             s->dsp.prefetch(src[1]+off, s->linesize, 4);
00710             s->dsp.prefetch(src[2]+off, s->linesize, 4);
00711         }else{
00712             off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);
00713             s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
00714         }
00715     }
00716 }
00717 
00718 static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00719                       qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00720                       qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00721                       h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00722                       int pixel_shift, int chroma_idc)
00723 {
00724     MpegEncContext * const s = &h->s;
00725     const int mb_xy= h->mb_xy;
00726     const int mb_type = s->current_picture.f.mb_type[mb_xy];
00727 
00728     assert(IS_INTER(mb_type));
00729 
00730     if(HAVE_THREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))
00731         await_references(h);
00732     prefetch_motion(h, 0, pixel_shift, chroma_idc);
00733 
00734     if(IS_16X16(mb_type)){
00735         mc_part(h, 0, 1, 16, 0, dest_y, dest_cb, dest_cr, 0, 0,
00736                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
00737                 weight_op, weight_avg,
00738                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00739                 pixel_shift, chroma_idc);
00740     }else if(IS_16X8(mb_type)){
00741         mc_part(h, 0, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,
00742                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00743                 weight_op, weight_avg,
00744                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00745                 pixel_shift, chroma_idc);
00746         mc_part(h, 8, 0, 8, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,
00747                 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
00748                 weight_op, weight_avg,
00749                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00750                 pixel_shift, chroma_idc);
00751     }else if(IS_8X16(mb_type)){
00752         mc_part(h, 0, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
00753                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00754                 &weight_op[1], &weight_avg[1],
00755                 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),
00756                 pixel_shift, chroma_idc);
00757         mc_part(h, 4, 0, 16, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
00758                 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00759                 &weight_op[1], &weight_avg[1],
00760                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),
00761                 pixel_shift, chroma_idc);
00762     }else{
00763         int i;
00764 
00765         assert(IS_8X8(mb_type));
00766 
00767         for(i=0; i<4; i++){
00768             const int sub_mb_type= h->sub_mb_type[i];
00769             const int n= 4*i;
00770             int x_offset= (i&1)<<2;
00771             int y_offset= (i&2)<<1;
00772 
00773             if(IS_SUB_8X8(sub_mb_type)){
00774                 mc_part(h, n, 1, 8, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00775                     qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
00776                     &weight_op[1], &weight_avg[1],
00777                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00778                     pixel_shift, chroma_idc);
00779             }else if(IS_SUB_8X4(sub_mb_type)){
00780                 mc_part(h, n  , 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00781                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00782                     &weight_op[1], &weight_avg[1],
00783                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00784                     pixel_shift, chroma_idc);
00785                 mc_part(h, n+2, 0, 4, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
00786                     qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
00787                     &weight_op[1], &weight_avg[1],
00788                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00789                     pixel_shift, chroma_idc);
00790             }else if(IS_SUB_4X8(sub_mb_type)){
00791                 mc_part(h, n  , 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
00792                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00793                     &weight_op[2], &weight_avg[2],
00794                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00795                     pixel_shift, chroma_idc);
00796                 mc_part(h, n+1, 0, 8, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
00797                     qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00798                     &weight_op[2], &weight_avg[2],
00799                     IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00800                     pixel_shift, chroma_idc);
00801             }else{
00802                 int j;
00803                 assert(IS_SUB_4X4(sub_mb_type));
00804                 for(j=0; j<4; j++){
00805                     int sub_x_offset= x_offset + 2*(j&1);
00806                     int sub_y_offset= y_offset +   (j&2);
00807                     mc_part(h, n+j, 1, 4, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
00808                         qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
00809                         &weight_op[2], &weight_avg[2],
00810                         IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),
00811                         pixel_shift, chroma_idc);
00812                 }
00813             }
00814         }
00815     }
00816 
00817     prefetch_motion(h, 1, pixel_shift, chroma_idc);
00818 }
00819 
00820 static av_always_inline void
00821 hl_motion_420(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00822               qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00823               qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00824               h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00825               int pixel_shift)
00826 {
00827     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
00828               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 1);
00829 }
00830 
00831 static av_always_inline void
00832 hl_motion_422(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
00833               qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
00834               qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
00835               h264_weight_func *weight_op, h264_biweight_func *weight_avg,
00836               int pixel_shift)
00837 {
00838     hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put,
00839               qpix_avg, chroma_avg, weight_op, weight_avg, pixel_shift, 2);
00840 }
00841 
00842 static void free_tables(H264Context *h, int free_rbsp){
00843     int i;
00844     H264Context *hx;
00845 
00846     av_freep(&h->intra4x4_pred_mode);
00847     av_freep(&h->chroma_pred_mode_table);
00848     av_freep(&h->cbp_table);
00849     av_freep(&h->mvd_table[0]);
00850     av_freep(&h->mvd_table[1]);
00851     av_freep(&h->direct_table);
00852     av_freep(&h->non_zero_count);
00853     av_freep(&h->slice_table_base);
00854     h->slice_table= NULL;
00855     av_freep(&h->list_counts);
00856 
00857     av_freep(&h->mb2b_xy);
00858     av_freep(&h->mb2br_xy);
00859 
00860     for(i = 0; i < MAX_THREADS; i++) {
00861         hx = h->thread_context[i];
00862         if(!hx) continue;
00863         av_freep(&hx->top_borders[1]);
00864         av_freep(&hx->top_borders[0]);
00865         av_freep(&hx->s.obmc_scratchpad);
00866         if (free_rbsp){
00867             av_freep(&hx->rbsp_buffer[1]);
00868             av_freep(&hx->rbsp_buffer[0]);
00869             hx->rbsp_buffer_size[0] = 0;
00870             hx->rbsp_buffer_size[1] = 0;
00871         }
00872         if (i) av_freep(&h->thread_context[i]);
00873     }
00874 }
00875 
00876 static void init_dequant8_coeff_table(H264Context *h){
00877     int i,j,q,x;
00878     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00879 
00880     for(i=0; i<6; i++ ){
00881         h->dequant8_coeff[i] = h->dequant8_buffer[i];
00882         for(j=0; j<i; j++){
00883             if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){
00884                 h->dequant8_coeff[i] = h->dequant8_buffer[j];
00885                 break;
00886             }
00887         }
00888         if(j<i)
00889             continue;
00890 
00891         for(q=0; q<max_qp+1; q++){
00892             int shift = div6[q];
00893             int idx = rem6[q];
00894             for(x=0; x<64; x++)
00895                 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
00896                     ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
00897                     h->pps.scaling_matrix8[i][x]) << shift;
00898         }
00899     }
00900 }
00901 
00902 static void init_dequant4_coeff_table(H264Context *h){
00903     int i,j,q,x;
00904     const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
00905     for(i=0; i<6; i++ ){
00906         h->dequant4_coeff[i] = h->dequant4_buffer[i];
00907         for(j=0; j<i; j++){
00908             if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
00909                 h->dequant4_coeff[i] = h->dequant4_buffer[j];
00910                 break;
00911             }
00912         }
00913         if(j<i)
00914             continue;
00915 
00916         for(q=0; q<max_qp+1; q++){
00917             int shift = div6[q] + 2;
00918             int idx = rem6[q];
00919             for(x=0; x<16; x++)
00920                 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
00921                     ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
00922                     h->pps.scaling_matrix4[i][x]) << shift;
00923         }
00924     }
00925 }
00926 
00927 static void init_dequant_tables(H264Context *h){
00928     int i,x;
00929     init_dequant4_coeff_table(h);
00930     if(h->pps.transform_8x8_mode)
00931         init_dequant8_coeff_table(h);
00932     if(h->sps.transform_bypass){
00933         for(i=0; i<6; i++)
00934             for(x=0; x<16; x++)
00935                 h->dequant4_coeff[i][0][x] = 1<<6;
00936         if(h->pps.transform_8x8_mode)
00937             for(i=0; i<6; i++)
00938                 for(x=0; x<64; x++)
00939                     h->dequant8_coeff[i][0][x] = 1<<6;
00940     }
00941 }
00942 
00943 
00944 int ff_h264_alloc_tables(H264Context *h){
00945     MpegEncContext * const s = &h->s;
00946     const int big_mb_num= s->mb_stride * (s->mb_height+1);
00947     const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
00948     int x,y;
00949 
00950     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8  * sizeof(uint8_t), fail)
00951 
00952     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count    , big_mb_num * 48 * sizeof(uint8_t), fail)
00953     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
00954     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
00955 
00956     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
00957     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
00958     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
00959     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
00960     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
00961 
00962     memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
00963     h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
00964 
00965     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy  , big_mb_num * sizeof(uint32_t), fail);
00966     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
00967     for(y=0; y<s->mb_height; y++){
00968         for(x=0; x<s->mb_width; x++){
00969             const int mb_xy= x + y*s->mb_stride;
00970             const int b_xy = 4*x + 4*y*h->b_stride;
00971 
00972             h->mb2b_xy [mb_xy]= b_xy;
00973             h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
00974         }
00975     }
00976 
00977     s->obmc_scratchpad = NULL;
00978 
00979     if(!h->dequant4_coeff[0])
00980         init_dequant_tables(h);
00981 
00982     return 0;
00983 fail:
00984     free_tables(h, 1);
00985     return -1;
00986 }
00987 
00991 static void clone_tables(H264Context *dst, H264Context *src, int i){
00992     MpegEncContext * const s = &src->s;
00993     dst->intra4x4_pred_mode       = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
00994     dst->non_zero_count           = src->non_zero_count;
00995     dst->slice_table              = src->slice_table;
00996     dst->cbp_table                = src->cbp_table;
00997     dst->mb2b_xy                  = src->mb2b_xy;
00998     dst->mb2br_xy                 = src->mb2br_xy;
00999     dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
01000     dst->mvd_table[0]             = src->mvd_table[0] + i*8*2*s->mb_stride;
01001     dst->mvd_table[1]             = src->mvd_table[1] + i*8*2*s->mb_stride;
01002     dst->direct_table             = src->direct_table;
01003     dst->list_counts              = src->list_counts;
01004 
01005     dst->s.obmc_scratchpad = NULL;
01006     ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc);
01007 }
01008 
01013 static int context_init(H264Context *h){
01014     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
01015     FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)
01016 
01017     h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
01018     h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
01019 
01020     return 0;
01021 fail:
01022     return -1; // free_tables will clean up for us
01023 }
01024 
01025 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
01026 
01027 static av_cold void common_init(H264Context *h){
01028     MpegEncContext * const s = &h->s;
01029 
01030     s->width = s->avctx->width;
01031     s->height = s->avctx->height;
01032     s->codec_id= s->avctx->codec->id;
01033 
01034     ff_h264dsp_init(&h->h264dsp, 8, 1);
01035     ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
01036 
01037     h->dequant_coeff_pps= -1;
01038     s->unrestricted_mv=1;
01039 
01040     dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
01041 
01042     memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
01043     memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
01044 }
01045 
01046 int ff_h264_decode_extradata(H264Context *h)
01047 {
01048     AVCodecContext *avctx = h->s.avctx;
01049 
01050     if(avctx->extradata[0] == 1){
01051         int i, cnt, nalsize;
01052         unsigned char *p = avctx->extradata;
01053 
01054         h->is_avc = 1;
01055 
01056         if(avctx->extradata_size < 7) {
01057             av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
01058             return -1;
01059         }
01060         /* sps and pps in the avcC always have length coded with 2 bytes,
01061            so put a fake nal_length_size = 2 while parsing them */
01062         h->nal_length_size = 2;
01063         // Decode sps from avcC
01064         cnt = *(p+5) & 0x1f; // Number of sps
01065         p += 6;
01066         for (i = 0; i < cnt; i++) {
01067             nalsize = AV_RB16(p) + 2;
01068             if (p - avctx->extradata + nalsize > avctx->extradata_size)
01069                 return -1;
01070             if(decode_nal_units(h, p, nalsize) < 0) {
01071                 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
01072                 return -1;
01073             }
01074             p += nalsize;
01075         }
01076         // Decode pps from avcC
01077         cnt = *(p++); // Number of pps
01078         for (i = 0; i < cnt; i++) {
01079             nalsize = AV_RB16(p) + 2;
01080             if (p - avctx->extradata + nalsize > avctx->extradata_size)
01081                 return -1;
01082             if (decode_nal_units(h, p, nalsize) < 0) {
01083                 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
01084                 return -1;
01085             }
01086             p += nalsize;
01087         }
01088         // Now store right nal length size, that will be use to parse all other nals
01089         h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;
01090     } else {
01091         h->is_avc = 0;
01092         if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)
01093             return -1;
01094     }
01095     return 0;
01096 }
01097 
01098 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
01099     H264Context *h= avctx->priv_data;
01100     MpegEncContext * const s = &h->s;
01101     int i;
01102 
01103     MPV_decode_defaults(s);
01104 
01105     s->avctx = avctx;
01106     common_init(h);
01107 
01108     s->out_format = FMT_H264;
01109     s->workaround_bugs= avctx->workaround_bugs;
01110 
01111     // set defaults
01112 //    s->decode_mb= ff_h263_decode_mb;
01113     s->quarter_sample = 1;
01114     if(!avctx->has_b_frames)
01115     s->low_delay= 1;
01116 
01117     avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
01118 
01119     ff_h264_decode_init_vlc();
01120 
01121     h->pixel_shift = 0;
01122     h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;
01123 
01124     h->thread_context[0] = h;
01125     h->outputed_poc = h->next_outputed_poc = INT_MIN;
01126     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
01127         h->last_pocs[i] = INT_MIN;
01128     h->prev_poc_msb= 1<<16;
01129     h->x264_build = -1;
01130     ff_h264_reset_sei(h);
01131     if(avctx->codec_id == CODEC_ID_H264){
01132         if(avctx->ticks_per_frame == 1){
01133             s->avctx->time_base.den *=2;
01134         }
01135         avctx->ticks_per_frame = 2;
01136     }
01137 
01138     if(avctx->extradata_size > 0 && avctx->extradata &&
01139         ff_h264_decode_extradata(h))
01140         return -1;
01141 
01142     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01143         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01144         s->low_delay = 0;
01145     }
01146 
01147     return 0;
01148 }
01149 
01150 #define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))
01151 static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)
01152 {
01153     int i;
01154 
01155     for (i=0; i<count; i++){
01156         assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||
01157                 IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||
01158                 !from[i]));
01159         to[i] = REBASE_PICTURE(from[i], new_base, old_base);
01160     }
01161 }
01162 
01163 static void copy_parameter_set(void **to, void **from, int count, int size)
01164 {
01165     int i;
01166 
01167     for (i=0; i<count; i++){
01168         if (to[i] && !from[i]) av_freep(&to[i]);
01169         else if (from[i] && !to[i]) to[i] = av_malloc(size);
01170 
01171         if (from[i]) memcpy(to[i], from[i], size);
01172     }
01173 }
01174 
01175 static int decode_init_thread_copy(AVCodecContext *avctx){
01176     H264Context *h= avctx->priv_data;
01177 
01178     if (!avctx->internal->is_copy)
01179         return 0;
01180     memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01181     memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01182 
01183     return 0;
01184 }
01185 
01186 #define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)
01187 static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){
01188     H264Context *h= dst->priv_data, *h1= src->priv_data;
01189     MpegEncContext * const s = &h->s, * const s1 = &h1->s;
01190     int inited = s->context_initialized, err;
01191     int i;
01192 
01193     if(dst == src || !s1->context_initialized) return 0;
01194 
01195     err = ff_mpeg_update_thread_context(dst, src);
01196     if(err) return err;
01197 
01198     //FIXME handle width/height changing
01199     if(!inited){
01200         for(i = 0; i < MAX_SPS_COUNT; i++)
01201             av_freep(h->sps_buffers + i);
01202 
01203         for(i = 0; i < MAX_PPS_COUNT; i++)
01204             av_freep(h->pps_buffers + i);
01205 
01206         memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc
01207         memset(h->sps_buffers, 0, sizeof(h->sps_buffers));
01208         memset(h->pps_buffers, 0, sizeof(h->pps_buffers));
01209         if (ff_h264_alloc_tables(h) < 0) {
01210             av_log(dst, AV_LOG_ERROR, "Could not allocate memory for h264\n");
01211             return AVERROR(ENOMEM);
01212         }
01213         context_init(h);
01214 
01215         for(i=0; i<2; i++){
01216             h->rbsp_buffer[i] = NULL;
01217             h->rbsp_buffer_size[i] = 0;
01218         }
01219 
01220         h->thread_context[0] = h;
01221 
01222         // frame_start may not be called for the next thread (if it's decoding a bottom field)
01223         // so this has to be allocated here
01224         h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01225 
01226         s->dsp.clear_blocks(h->mb);
01227         s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));
01228     }
01229 
01230     //extradata/NAL handling
01231     h->is_avc          = h1->is_avc;
01232 
01233     //SPS/PPS
01234     copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));
01235     h->sps             = h1->sps;
01236     copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));
01237     h->pps             = h1->pps;
01238 
01239     //Dequantization matrices
01240     //FIXME these are big - can they be only copied when PPS changes?
01241     copy_fields(h, h1, dequant4_buffer, dequant4_coeff);
01242 
01243     for(i=0; i<6; i++)
01244         h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);
01245 
01246     for(i=0; i<6; i++)
01247         h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);
01248 
01249     h->dequant_coeff_pps = h1->dequant_coeff_pps;
01250 
01251     //POC timing
01252     copy_fields(h, h1, poc_lsb, redundant_pic_count);
01253 
01254     //reference lists
01255     copy_fields(h, h1, ref_count, list_count);
01256     copy_fields(h, h1, ref_list,  intra_gb);
01257     copy_fields(h, h1, short_ref, cabac_init_idc);
01258 
01259     copy_picture_range(h->short_ref,   h1->short_ref,   32, s, s1);
01260     copy_picture_range(h->long_ref,    h1->long_ref,    32, s, s1);
01261     copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);
01262 
01263     h->last_slice_type = h1->last_slice_type;
01264 
01265     if(!s->current_picture_ptr) return 0;
01266 
01267     if(!s->dropable) {
01268         err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
01269         h->prev_poc_msb     = h->poc_msb;
01270         h->prev_poc_lsb     = h->poc_lsb;
01271     }
01272     h->prev_frame_num_offset= h->frame_num_offset;
01273     h->prev_frame_num       = h->frame_num;
01274     h->outputed_poc         = h->next_outputed_poc;
01275 
01276     return err;
01277 }
01278 
01279 int ff_h264_frame_start(H264Context *h){
01280     MpegEncContext * const s = &h->s;
01281     int i;
01282     const int pixel_shift = h->pixel_shift;
01283 
01284     if(MPV_frame_start(s, s->avctx) < 0)
01285         return -1;
01286     ff_er_frame_start(s);
01287     /*
01288      * MPV_frame_start uses pict_type to derive key_frame.
01289      * This is incorrect for H.264; IDR markings must be used.
01290      * Zero here; IDR markings per slice in frame or fields are ORed in later.
01291      * See decode_nal_units().
01292      */
01293     s->current_picture_ptr->f.key_frame = 0;
01294     s->current_picture_ptr->mmco_reset= 0;
01295 
01296     assert(s->linesize && s->uvlinesize);
01297 
01298     for(i=0; i<16; i++){
01299         h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
01300         h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
01301     }
01302     for(i=0; i<16; i++){
01303         h->block_offset[16+i]=
01304         h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01305         h->block_offset[48+16+i]=
01306         h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
01307     }
01308 
01309     /* can't be in alloc_tables because linesize isn't known there.
01310      * FIXME: redo bipred weight to not require extra buffer? */
01311     for(i = 0; i < s->slice_context_count; i++)
01312         if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
01313             h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);
01314 
01315     /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
01316     memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
01317 
01318 //    s->decode = (s->flags & CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.f.reference /*|| h->contains_intra*/ || 1;
01319 
01320     // We mark the current picture as non-reference after allocating it, so
01321     // that if we break out due to an error it can be released automatically
01322     // in the next MPV_frame_start().
01323     // SVQ3 as well as most other codecs have only last/next/current and thus
01324     // get released even with set reference, besides SVQ3 and others do not
01325     // mark frames as reference later "naturally".
01326     if(s->codec_id != CODEC_ID_SVQ3)
01327         s->current_picture_ptr->f.reference = 0;
01328 
01329     s->current_picture_ptr->field_poc[0]=
01330     s->current_picture_ptr->field_poc[1]= INT_MAX;
01331 
01332     h->next_output_pic = NULL;
01333 
01334     assert(s->current_picture_ptr->long_ref==0);
01335 
01336     return 0;
01337 }
01338 
01347 static void decode_postinit(H264Context *h, int setup_finished){
01348     MpegEncContext * const s = &h->s;
01349     Picture *out = s->current_picture_ptr;
01350     Picture *cur = s->current_picture_ptr;
01351     int i, pics, out_of_order, out_idx;
01352     int invalid = 0, cnt = 0;
01353 
01354     s->current_picture_ptr->f.qscale_type = FF_QSCALE_TYPE_H264;
01355     s->current_picture_ptr->f.pict_type   = s->pict_type;
01356 
01357     if (h->next_output_pic) return;
01358 
01359     if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
01360         //FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.
01361         //If we have one field per packet, we can. The check in decode_nal_units() is not good enough
01362         //to find this yet, so we assume the worst for now.
01363         //if (setup_finished)
01364         //    ff_thread_finish_setup(s->avctx);
01365         return;
01366     }
01367 
01368     cur->f.interlaced_frame = 0;
01369     cur->f.repeat_pict      = 0;
01370 
01371     /* Signal interlacing information externally. */
01372     /* Prioritize picture timing SEI information over used decoding process if it exists. */
01373 
01374     if(h->sps.pic_struct_present_flag){
01375         switch (h->sei_pic_struct)
01376         {
01377         case SEI_PIC_STRUCT_FRAME:
01378             break;
01379         case SEI_PIC_STRUCT_TOP_FIELD:
01380         case SEI_PIC_STRUCT_BOTTOM_FIELD:
01381             cur->f.interlaced_frame = 1;
01382             break;
01383         case SEI_PIC_STRUCT_TOP_BOTTOM:
01384         case SEI_PIC_STRUCT_BOTTOM_TOP:
01385             if (FIELD_OR_MBAFF_PICTURE)
01386                 cur->f.interlaced_frame = 1;
01387             else
01388                 // try to flag soft telecine progressive
01389                 cur->f.interlaced_frame = h->prev_interlaced_frame;
01390             break;
01391         case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
01392         case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
01393             // Signal the possibility of telecined film externally (pic_struct 5,6)
01394             // From these hints, let the applications decide if they apply deinterlacing.
01395             cur->f.repeat_pict = 1;
01396             break;
01397         case SEI_PIC_STRUCT_FRAME_DOUBLING:
01398             // Force progressive here, as doubling interlaced frame is a bad idea.
01399             cur->f.repeat_pict = 2;
01400             break;
01401         case SEI_PIC_STRUCT_FRAME_TRIPLING:
01402             cur->f.repeat_pict = 4;
01403             break;
01404         }
01405 
01406         if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
01407             cur->f.interlaced_frame = (h->sei_ct_type & (1 << 1)) != 0;
01408     }else{
01409         /* Derive interlacing flag from used decoding process. */
01410         cur->f.interlaced_frame = FIELD_OR_MBAFF_PICTURE;
01411     }
01412     h->prev_interlaced_frame = cur->f.interlaced_frame;
01413 
01414     if (cur->field_poc[0] != cur->field_poc[1]){
01415         /* Derive top_field_first from field pocs. */
01416         cur->f.top_field_first = cur->field_poc[0] < cur->field_poc[1];
01417     }else{
01418         if (cur->f.interlaced_frame || h->sps.pic_struct_present_flag) {
01419             /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
01420             if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
01421               || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
01422                 cur->f.top_field_first = 1;
01423             else
01424                 cur->f.top_field_first = 0;
01425         }else{
01426             /* Most likely progressive */
01427             cur->f.top_field_first = 0;
01428         }
01429     }
01430 
01431     //FIXME do something with unavailable reference frames
01432 
01433     /* Sort B-frames into display order */
01434 
01435     if(h->sps.bitstream_restriction_flag
01436        && s->avctx->has_b_frames < h->sps.num_reorder_frames){
01437         s->avctx->has_b_frames = h->sps.num_reorder_frames;
01438         s->low_delay = 0;
01439     }
01440 
01441     if(   s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
01442        && !h->sps.bitstream_restriction_flag){
01443         s->avctx->has_b_frames = MAX_DELAYED_PIC_COUNT - 1;
01444         s->low_delay= 0;
01445     }
01446 
01447     pics = 0;
01448     while(h->delayed_pic[pics]) pics++;
01449 
01450     assert(pics <= MAX_DELAYED_PIC_COUNT);
01451 
01452     h->delayed_pic[pics++] = cur;
01453     if (cur->f.reference == 0)
01454         cur->f.reference = DELAYED_PIC_REF;
01455 
01456     /* Frame reordering. This code takes pictures from coding order and sorts
01457      * them by their incremental POC value into display order. It supports POC
01458      * gaps, MMCO reset codes and random resets.
01459      * A "display group" can start either with a IDR frame (f.key_frame = 1),
01460      * and/or can be closed down with a MMCO reset code. In sequences where
01461      * there is no delay, we can't detect that (since the frame was already
01462      * output to the user), so we also set h->mmco_reset to detect the MMCO
01463      * reset code.
01464      * FIXME: if we detect insufficient delays (as per s->avctx->has_b_frames),
01465      * we increase the delay between input and output. All frames affected by
01466      * the lag (e.g. those that should have been output before another frame
01467      * that we already returned to the user) will be dropped. This is a bug
01468      * that we will fix later. */
01469     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++) {
01470         cnt     += out->poc < h->last_pocs[i];
01471         invalid += out->poc == INT_MIN;
01472     }
01473     if (!h->mmco_reset && !cur->f.key_frame && cnt + invalid == MAX_DELAYED_PIC_COUNT && cnt > 0) {
01474         h->mmco_reset = 2;
01475         if (pics > 1)
01476             h->delayed_pic[pics - 2]->mmco_reset = 2;
01477     }
01478     if (h->mmco_reset || cur->f.key_frame) {
01479         for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
01480             h->last_pocs[i] = INT_MIN;
01481         cnt     = 0;
01482         invalid = MAX_DELAYED_PIC_COUNT;
01483     }
01484     out = h->delayed_pic[0];
01485     out_idx = 0;
01486     for (i = 1; i < MAX_DELAYED_PIC_COUNT && h->delayed_pic[i] &&
01487          !h->delayed_pic[i-1]->mmco_reset && !h->delayed_pic[i]->f.key_frame; i++)
01488     {
01489         if(h->delayed_pic[i]->poc < out->poc){
01490             out = h->delayed_pic[i];
01491             out_idx = i;
01492         }
01493     }
01494     if (s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->f.key_frame || h->mmco_reset))
01495         h->next_outputed_poc = INT_MIN;
01496     out_of_order = !out->f.key_frame && !h->mmco_reset && (out->poc < h->next_outputed_poc);
01497 
01498     if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
01499         { }
01500     else if (out_of_order && pics-1 == s->avctx->has_b_frames &&
01501              s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT) {
01502         if (invalid + cnt < MAX_DELAYED_PIC_COUNT) {
01503             s->avctx->has_b_frames = FFMAX(s->avctx->has_b_frames, cnt);
01504         }
01505         s->low_delay = 0;
01506     } else if (s->low_delay &&
01507                ((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2) ||
01508                 cur->f.pict_type == AV_PICTURE_TYPE_B)) {
01509         s->low_delay = 0;
01510         s->avctx->has_b_frames++;
01511     }
01512 
01513     if(pics > s->avctx->has_b_frames){
01514         out->f.reference &= ~DELAYED_PIC_REF;
01515         out->owner2 = s; // for frame threading, the owner must be the second field's thread
01516                          // or else the first thread can release the picture and reuse it unsafely
01517         for(i=out_idx; h->delayed_pic[i]; i++)
01518             h->delayed_pic[i] = h->delayed_pic[i+1];
01519     }
01520     memmove(h->last_pocs, &h->last_pocs[1], sizeof(*h->last_pocs) * (MAX_DELAYED_PIC_COUNT - 1));
01521     h->last_pocs[MAX_DELAYED_PIC_COUNT - 1] = cur->poc;
01522     if(!out_of_order && pics > s->avctx->has_b_frames){
01523         h->next_output_pic = out;
01524         if (out->mmco_reset) {
01525             if (out_idx > 0) {
01526                 h->next_outputed_poc = out->poc;
01527                 h->delayed_pic[out_idx - 1]->mmco_reset = out->mmco_reset;
01528             } else {
01529                 h->next_outputed_poc = INT_MIN;
01530             }
01531         } else {
01532             if (out_idx == 0 && pics > 1 && h->delayed_pic[0]->f.key_frame) {
01533                 h->next_outputed_poc = INT_MIN;
01534             } else {
01535                 h->next_outputed_poc = out->poc;
01536             }
01537         }
01538         h->mmco_reset = 0;
01539     }else{
01540         av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");
01541     }
01542 
01543     if (setup_finished)
01544         ff_thread_finish_setup(s->avctx);
01545 }
01546 
01547 static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y,
01548                                               uint8_t *src_cb, uint8_t *src_cr,
01549                                               int linesize, int uvlinesize, int simple)
01550 {
01551     MpegEncContext * const s = &h->s;
01552     uint8_t *top_border;
01553     int top_idx = 1;
01554     const int pixel_shift = h->pixel_shift;
01555     int chroma444 = CHROMA444;
01556     int chroma422 = CHROMA422;
01557 
01558     src_y  -=   linesize;
01559     src_cb -= uvlinesize;
01560     src_cr -= uvlinesize;
01561 
01562     if(!simple && FRAME_MBAFF){
01563         if(s->mb_y&1){
01564             if(!MB_MBAFF){
01565                 top_border = h->top_borders[0][s->mb_x];
01566                 AV_COPY128(top_border, src_y + 15*linesize);
01567                 if (pixel_shift)
01568                     AV_COPY128(top_border+16, src_y+15*linesize+16);
01569                 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01570                     if(chroma444){
01571                         if (pixel_shift){
01572                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01573                             AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);
01574                             AV_COPY128(top_border+64, src_cr + 15*uvlinesize);
01575                             AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);
01576                         } else {
01577                             AV_COPY128(top_border+16, src_cb + 15*uvlinesize);
01578                             AV_COPY128(top_border+32, src_cr + 15*uvlinesize);
01579                         }
01580                     } else if(chroma422) {
01581                         if (pixel_shift) {
01582                             AV_COPY128(top_border+32, src_cb + 15*uvlinesize);
01583                             AV_COPY128(top_border+48, src_cr + 15*uvlinesize);
01584                         } else {
01585                             AV_COPY64(top_border+16, src_cb +  15*uvlinesize);
01586                             AV_COPY64(top_border+24, src_cr +  15*uvlinesize);
01587                         }
01588                     } else {
01589                         if (pixel_shift) {
01590                             AV_COPY128(top_border+32, src_cb+7*uvlinesize);
01591                             AV_COPY128(top_border+48, src_cr+7*uvlinesize);
01592                         } else {
01593                             AV_COPY64(top_border+16, src_cb+7*uvlinesize);
01594                             AV_COPY64(top_border+24, src_cr+7*uvlinesize);
01595                         }
01596                     }
01597                 }
01598             }
01599         }else if(MB_MBAFF){
01600             top_idx = 0;
01601         }else
01602             return;
01603     }
01604 
01605     top_border = h->top_borders[top_idx][s->mb_x];
01606     // There are two lines saved, the line above the the top macroblock of a pair,
01607     // and the line above the bottom macroblock
01608     AV_COPY128(top_border, src_y + 16*linesize);
01609     if (pixel_shift)
01610         AV_COPY128(top_border+16, src_y+16*linesize+16);
01611 
01612     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01613         if(chroma444){
01614             if (pixel_shift){
01615                 AV_COPY128(top_border+32, src_cb + 16*linesize);
01616                 AV_COPY128(top_border+48, src_cb + 16*linesize+16);
01617                 AV_COPY128(top_border+64, src_cr + 16*linesize);
01618                 AV_COPY128(top_border+80, src_cr + 16*linesize+16);
01619             } else {
01620                 AV_COPY128(top_border+16, src_cb + 16*linesize);
01621                 AV_COPY128(top_border+32, src_cr + 16*linesize);
01622             }
01623         } else if(chroma422) {
01624             if (pixel_shift) {
01625                 AV_COPY128(top_border+32, src_cb+16*uvlinesize);
01626                 AV_COPY128(top_border+48, src_cr+16*uvlinesize);
01627             } else {
01628                 AV_COPY64(top_border+16, src_cb+16*uvlinesize);
01629                 AV_COPY64(top_border+24, src_cr+16*uvlinesize);
01630             }
01631         } else {
01632             if (pixel_shift) {
01633                 AV_COPY128(top_border+32, src_cb+8*uvlinesize);
01634                 AV_COPY128(top_border+48, src_cr+8*uvlinesize);
01635             } else {
01636                 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
01637                 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
01638             }
01639         }
01640     }
01641 }
01642 
01643 static av_always_inline void xchg_mb_border(H264Context *h, uint8_t *src_y,
01644                                   uint8_t *src_cb, uint8_t *src_cr,
01645                                   int linesize, int uvlinesize,
01646                                   int xchg, int chroma444,
01647                                   int simple, int pixel_shift){
01648     MpegEncContext * const s = &h->s;
01649     int deblock_topleft;
01650     int deblock_top;
01651     int top_idx = 1;
01652     uint8_t *top_border_m1;
01653     uint8_t *top_border;
01654 
01655     if(!simple && FRAME_MBAFF){
01656         if(s->mb_y&1){
01657             if(!MB_MBAFF)
01658                 return;
01659         }else{
01660             top_idx = MB_MBAFF ? 0 : 1;
01661         }
01662     }
01663 
01664     if(h->deblocking_filter == 2) {
01665         deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;
01666         deblock_top     = h->top_type;
01667     } else {
01668         deblock_topleft = (s->mb_x > 0);
01669         deblock_top     = (s->mb_y > !!MB_FIELD);
01670     }
01671 
01672     src_y  -=   linesize + 1 + pixel_shift;
01673     src_cb -= uvlinesize + 1 + pixel_shift;
01674     src_cr -= uvlinesize + 1 + pixel_shift;
01675 
01676     top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
01677     top_border    = h->top_borders[top_idx][s->mb_x];
01678 
01679 #define XCHG(a,b,xchg)\
01680     if (pixel_shift) {\
01681         if (xchg) {\
01682             AV_SWAP64(b+0,a+0);\
01683             AV_SWAP64(b+8,a+8);\
01684         } else {\
01685             AV_COPY128(b,a); \
01686         }\
01687     } else \
01688 if (xchg) AV_SWAP64(b,a);\
01689 else      AV_COPY64(b,a);
01690 
01691     if(deblock_top){
01692         if(deblock_topleft){
01693             XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);
01694         }
01695         XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);
01696         XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);
01697         if(s->mb_x+1 < s->mb_width){
01698             XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);
01699         }
01700     }
01701     if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01702         if(chroma444){
01703             if(deblock_topleft){
01704                 XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01705                 XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01706             }
01707             XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);
01708             XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);
01709             XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);
01710             XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);
01711             if(s->mb_x+1 < s->mb_width){
01712                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);
01713                 XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);
01714             }
01715         } else {
01716             if(deblock_top){
01717                 if(deblock_topleft){
01718                     XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);
01719                     XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);
01720                 }
01721                 XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);
01722                 XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);
01723             }
01724         }
01725     }
01726 }
01727 
01728 static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {
01729     if (high_bit_depth) {
01730         return AV_RN32A(((int32_t*)mb) + index);
01731     } else
01732         return AV_RN16A(mb + index);
01733 }
01734 
01735 static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {
01736     if (high_bit_depth) {
01737         AV_WN32A(((int32_t*)mb) + index, value);
01738     } else
01739         AV_WN16A(mb + index, value);
01740 }
01741 
01742 static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01743                                                        int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01744 {
01745     MpegEncContext * const s = &h->s;
01746     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01747     void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
01748     int i;
01749     int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];
01750     block_offset += 16*p;
01751     if(IS_INTRA4x4(mb_type)){
01752         if(simple || !s->encoding){
01753             if(IS_8x8DCT(mb_type)){
01754                 if(transform_bypass){
01755                     idct_dc_add =
01756                     idct_add    = s->dsp.add_pixels8;
01757                 }else{
01758                     idct_dc_add = h->h264dsp.h264_idct8_dc_add;
01759                     idct_add    = h->h264dsp.h264_idct8_add;
01760                 }
01761                 for(i=0; i<16; i+=4){
01762                     uint8_t * const ptr= dest_y + block_offset[i];
01763                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01764                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01765                         h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01766                     }else{
01767                         const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01768                         h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
01769                                                     (h->topright_samples_available<<i)&0x4000, linesize);
01770                         if(nnz){
01771                             if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01772                                 idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01773                             else
01774                                 idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01775                         }
01776                     }
01777                 }
01778             }else{
01779                 if(transform_bypass){
01780                     idct_dc_add =
01781                     idct_add    = s->dsp.add_pixels4;
01782                 }else{
01783                     idct_dc_add = h->h264dsp.h264_idct_dc_add;
01784                     idct_add    = h->h264dsp.h264_idct_add;
01785                 }
01786                 for(i=0; i<16; i++){
01787                     uint8_t * const ptr= dest_y + block_offset[i];
01788                     const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
01789 
01790                     if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
01791                         h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01792                     }else{
01793                         uint8_t *topright;
01794                         int nnz, tr;
01795                         uint64_t tr_high;
01796                         if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
01797                             const int topright_avail= (h->topright_samples_available<<i)&0x8000;
01798                             assert(s->mb_y || linesize <= block_offset[i]);
01799                             if(!topright_avail){
01800                                 if (pixel_shift) {
01801                                     tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
01802                                     topright= (uint8_t*) &tr_high;
01803                                 } else {
01804                                     tr= ptr[3 - linesize]*0x01010101u;
01805                                     topright= (uint8_t*) &tr;
01806                                 }
01807                             }else
01808                                 topright= ptr + (4 << pixel_shift) - linesize;
01809                         }else
01810                             topright= NULL;
01811 
01812                         h->hpc.pred4x4[ dir ](ptr, topright, linesize);
01813                         nnz = h->non_zero_count_cache[ scan8[i+p*16] ];
01814                         if(nnz){
01815                             if(is_h264){
01816                                 if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01817                                     idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01818                                 else
01819                                     idct_add   (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);
01820                             } else if (CONFIG_SVQ3_DECODER)
01821                                 ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);
01822                         }
01823                     }
01824                 }
01825             }
01826         }
01827     }else{
01828         h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
01829         if(is_h264){
01830             if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){
01831                 if(!transform_bypass)
01832                     h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);
01833                 else{
01834                     static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
01835                                                             8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
01836                     for(i = 0; i < 16; i++)
01837                         dctcoef_set(h->mb+(p*256 << pixel_shift), pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));
01838                 }
01839             }
01840         } else if (CONFIG_SVQ3_DECODER)
01841             ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);
01842     }
01843 }
01844 
01845 static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,
01846                                                     int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)
01847 {
01848     MpegEncContext * const s = &h->s;
01849     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01850     int i;
01851     block_offset += 16*p;
01852     if(!IS_INTRA4x4(mb_type)){
01853         if(is_h264){
01854             if(IS_INTRA16x16(mb_type)){
01855                 if(transform_bypass){
01856                     if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
01857                         h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);
01858                     }else{
01859                         for(i=0; i<16; i++){
01860                             if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))
01861                                 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01862                         }
01863                     }
01864                 }else{
01865                     h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01866                 }
01867             }else if(h->cbp&15){
01868                 if(transform_bypass){
01869                     const int di = IS_8x8DCT(mb_type) ? 4 : 1;
01870                     idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
01871                     for(i=0; i<16; i+=di){
01872                         if(h->non_zero_count_cache[ scan8[i+p*16] ]){
01873                             idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);
01874                         }
01875                     }
01876                 }else{
01877                     if(IS_8x8DCT(mb_type)){
01878                         h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01879                     }else{
01880                         h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);
01881                     }
01882                 }
01883             }
01884         } else if (CONFIG_SVQ3_DECODER) {
01885             for(i=0; i<16; i++){
01886                 if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below
01887                     uint8_t * const ptr= dest_y + block_offset[i];
01888                     ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
01889                 }
01890             }
01891         }
01892     }
01893 }
01894 
01895 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift)
01896 {
01897     MpegEncContext * const s = &h->s;
01898     const int mb_x= s->mb_x;
01899     const int mb_y= s->mb_y;
01900     const int mb_xy= h->mb_xy;
01901     const int mb_type = s->current_picture.f.mb_type[mb_xy];
01902     uint8_t  *dest_y, *dest_cb, *dest_cr;
01903     int linesize, uvlinesize /*dct_offset*/;
01904     int i, j;
01905     int *block_offset = &h->block_offset[0];
01906     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
01907     /* is_h264 should always be true if SVQ3 is disabled. */
01908     const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
01909     void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
01910     const int block_h = 16 >> s->chroma_y_shift;
01911     const int chroma422 = CHROMA422;
01912 
01913     dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
01914     dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
01915     dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h;
01916 
01917     s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
01918     s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);
01919 
01920     h->list_counts[mb_xy]= h->list_count;
01921 
01922     if (!simple && MB_FIELD) {
01923         linesize   = h->mb_linesize   = s->linesize * 2;
01924         uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
01925         block_offset = &h->block_offset[48];
01926         if(mb_y&1){ //FIXME move out of this function?
01927             dest_y -= s->linesize*15;
01928             dest_cb-= s->uvlinesize * (block_h - 1);
01929             dest_cr-= s->uvlinesize * (block_h - 1);
01930         }
01931         if(FRAME_MBAFF) {
01932             int list;
01933             for(list=0; list<h->list_count; list++){
01934                 if(!USES_LIST(mb_type, list))
01935                     continue;
01936                 if(IS_16X16(mb_type)){
01937                     int8_t *ref = &h->ref_cache[list][scan8[0]];
01938                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
01939                 }else{
01940                     for(i=0; i<16; i+=4){
01941                         int ref = h->ref_cache[list][scan8[i]];
01942                         if(ref >= 0)
01943                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
01944                     }
01945                 }
01946             }
01947         }
01948     } else {
01949         linesize   = h->mb_linesize   = s->linesize;
01950         uvlinesize = h->mb_uvlinesize = s->uvlinesize;
01951 //        dct_offset = s->linesize * 16;
01952     }
01953 
01954     if (!simple && IS_INTRA_PCM(mb_type)) {
01955         if (pixel_shift) {
01956             const int bit_depth = h->sps.bit_depth_luma;
01957             int j;
01958             GetBitContext gb;
01959             init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);
01960 
01961             for (i = 0; i < 16; i++) {
01962                 uint16_t *tmp_y  = (uint16_t*)(dest_y  + i*linesize);
01963                 for (j = 0; j < 16; j++)
01964                     tmp_y[j] = get_bits(&gb, bit_depth);
01965             }
01966             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01967                 if (!h->sps.chroma_format_idc) {
01968                     for (i = 0; i < block_h; i++) {
01969                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01970                         for (j = 0; j < 8; j++) {
01971                             tmp_cb[j] = 1 << (bit_depth - 1);
01972                         }
01973                     }
01974                     for (i = 0; i < block_h; i++) {
01975                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01976                         for (j = 0; j < 8; j++) {
01977                             tmp_cr[j] = 1 << (bit_depth - 1);
01978                         }
01979                     }
01980                 } else {
01981                     for (i = 0; i < block_h; i++) {
01982                         uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);
01983                         for (j = 0; j < 8; j++)
01984                             tmp_cb[j] = get_bits(&gb, bit_depth);
01985                     }
01986                     for (i = 0; i < block_h; i++) {
01987                         uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);
01988                         for (j = 0; j < 8; j++)
01989                             tmp_cr[j] = get_bits(&gb, bit_depth);
01990                     }
01991                 }
01992             }
01993         } else {
01994             for (i=0; i<16; i++) {
01995                 memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
01996             }
01997             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
01998                 if (!h->sps.chroma_format_idc) {
01999                     for (i = 0; i < block_h; i++) {
02000                         memset(dest_cb + i*uvlinesize, 128, 8);
02001                         memset(dest_cr + i*uvlinesize, 128, 8);
02002                     }
02003                 } else {
02004                     for (i = 0; i < block_h; i++) {
02005                         memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4,  8);
02006                         memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4,  8);
02007                     }
02008                 }
02009             }
02010         }
02011     } else {
02012         if(IS_INTRA(mb_type)){
02013             if(h->deblocking_filter)
02014                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);
02015 
02016             if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
02017                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
02018                 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
02019             }
02020 
02021             hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
02022 
02023             if(h->deblocking_filter)
02024                 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);
02025         }else if(is_h264){
02026             if (chroma422) {
02027                 hl_motion_422(h, dest_y, dest_cb, dest_cr,
02028                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02029                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02030                               h->h264dsp.weight_h264_pixels_tab,
02031                               h->h264dsp.biweight_h264_pixels_tab,
02032                               pixel_shift);
02033             } else {
02034                 hl_motion_420(h, dest_y, dest_cb, dest_cr,
02035                               s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02036                               s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02037                               h->h264dsp.weight_h264_pixels_tab,
02038                               h->h264dsp.biweight_h264_pixels_tab,
02039                               pixel_shift);
02040             }
02041         }
02042 
02043         hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);
02044 
02045         if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
02046             uint8_t *dest[2] = {dest_cb, dest_cr};
02047             if(transform_bypass){
02048                 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
02049                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);
02050                     h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);
02051                 }else{
02052                     idct_add = s->dsp.add_pixels4;
02053                     for(j=1; j<3; j++){
02054                         for(i=j*16; i<j*16+4; i++){
02055                             if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))
02056                                 idct_add   (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);
02057                         }
02058                         if (chroma422) {
02059                             for(i=j*16+4; i<j*16+8; i++){
02060                                 if(h->non_zero_count_cache[ scan8[i+4] ] || dctcoef_get(h->mb, pixel_shift, i*16))
02061                                     idct_add   (dest[j-1] + block_offset[i+4], h->mb + (i*16 << pixel_shift), uvlinesize);
02062                             }
02063                         }
02064                     }
02065                 }
02066             }else{
02067                 if(is_h264){
02068                     int qp[2];
02069                     if (chroma422) {
02070                         qp[0] = h->chroma_qp[0] + 3;
02071                         qp[1] = h->chroma_qp[1] + 3;
02072                     } else {
02073                         qp[0] = h->chroma_qp[0];
02074                         qp[1] = h->chroma_qp[1];
02075                     }
02076                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
02077                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]);
02078                     if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
02079                         h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]);
02080                     h->h264dsp.h264_idct_add8(dest, block_offset,
02081                                               h->mb, uvlinesize,
02082                                               h->non_zero_count_cache);
02083                 } else if (CONFIG_SVQ3_DECODER) {
02084                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
02085                     h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
02086                     for(j=1; j<3; j++){
02087                         for(i=j*16; i<j*16+4; i++){
02088                             if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
02089                                 uint8_t * const ptr= dest[j-1] + block_offset[i];
02090                                 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);
02091                             }
02092                         }
02093                     }
02094                 }
02095             }
02096         }
02097     }
02098     if(h->cbp || IS_INTRA(mb_type))
02099     {
02100         s->dsp.clear_blocks(h->mb);
02101         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02102     }
02103 }
02104 
02105 static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){
02106     MpegEncContext * const s = &h->s;
02107     const int mb_x= s->mb_x;
02108     const int mb_y= s->mb_y;
02109     const int mb_xy= h->mb_xy;
02110     const int mb_type = s->current_picture.f.mb_type[mb_xy];
02111     uint8_t  *dest[3];
02112     int linesize;
02113     int i, j, p;
02114     int *block_offset = &h->block_offset[0];
02115     const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
02116     const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;
02117 
02118     for (p = 0; p < plane_count; p++)
02119     {
02120         dest[p] = s->current_picture.f.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;
02121         s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);
02122     }
02123 
02124     h->list_counts[mb_xy]= h->list_count;
02125 
02126     if (!simple && MB_FIELD) {
02127         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;
02128         block_offset = &h->block_offset[48];
02129         if(mb_y&1) //FIXME move out of this function?
02130             for (p = 0; p < 3; p++)
02131                 dest[p] -= s->linesize*15;
02132         if(FRAME_MBAFF) {
02133             int list;
02134             for(list=0; list<h->list_count; list++){
02135                 if(!USES_LIST(mb_type, list))
02136                     continue;
02137                 if(IS_16X16(mb_type)){
02138                     int8_t *ref = &h->ref_cache[list][scan8[0]];
02139                     fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
02140                 }else{
02141                     for(i=0; i<16; i+=4){
02142                         int ref = h->ref_cache[list][scan8[i]];
02143                         if(ref >= 0)
02144                             fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
02145                     }
02146                 }
02147             }
02148         }
02149     } else {
02150         linesize   = h->mb_linesize = h->mb_uvlinesize = s->linesize;
02151     }
02152 
02153     if (!simple && IS_INTRA_PCM(mb_type)) {
02154         if (pixel_shift) {
02155             const int bit_depth = h->sps.bit_depth_luma;
02156             GetBitContext gb;
02157             init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);
02158 
02159             for (p = 0; p < plane_count; p++) {
02160                 for (i = 0; i < 16; i++) {
02161                     uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);
02162                     for (j = 0; j < 16; j++)
02163                         tmp[j] = get_bits(&gb, bit_depth);
02164                 }
02165             }
02166         } else {
02167             for (p = 0; p < plane_count; p++) {
02168                 for (i = 0; i < 16; i++) {
02169                     memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);
02170                 }
02171             }
02172         }
02173     } else {
02174         if(IS_INTRA(mb_type)){
02175             if(h->deblocking_filter)
02176                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);
02177 
02178             for (p = 0; p < plane_count; p++)
02179                 hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02180 
02181             if(h->deblocking_filter)
02182                 xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);
02183         }else{
02184             hl_motion(h, dest[0], dest[1], dest[2],
02185                       s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
02186                       s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
02187                       h->h264dsp.weight_h264_pixels_tab,
02188                       h->h264dsp.biweight_h264_pixels_tab, pixel_shift, 3);
02189         }
02190 
02191         for (p = 0; p < plane_count; p++)
02192             hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);
02193     }
02194     if(h->cbp || IS_INTRA(mb_type))
02195     {
02196         s->dsp.clear_blocks(h->mb);
02197         s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));
02198     }
02199 }
02200 
02204 #define hl_decode_mb_simple(sh, bits) \
02205 static void hl_decode_mb_simple_ ## bits(H264Context *h){ \
02206     hl_decode_mb_internal(h, 1, sh); \
02207 }
02208 hl_decode_mb_simple(0, 8)
02209 hl_decode_mb_simple(1, 16)
02210 
02214 static void av_noinline hl_decode_mb_complex(H264Context *h){
02215     hl_decode_mb_internal(h, 0, h->pixel_shift);
02216 }
02217 
02218 static void av_noinline hl_decode_mb_444_complex(H264Context *h){
02219     hl_decode_mb_444_internal(h, 0, h->pixel_shift);
02220 }
02221 
02222 static void av_noinline hl_decode_mb_444_simple(H264Context *h){
02223     hl_decode_mb_444_internal(h, 1, 0);
02224 }
02225 
02226 void ff_h264_hl_decode_mb(H264Context *h){
02227     MpegEncContext * const s = &h->s;
02228     const int mb_xy= h->mb_xy;
02229     const int mb_type = s->current_picture.f.mb_type[mb_xy];
02230     int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
02231 
02232     if (CHROMA444) {
02233         if(is_complex || h->pixel_shift)
02234             hl_decode_mb_444_complex(h);
02235         else
02236             hl_decode_mb_444_simple(h);
02237     } else if (is_complex) {
02238         hl_decode_mb_complex(h);
02239     } else if (h->pixel_shift) {
02240         hl_decode_mb_simple_16(h);
02241     } else
02242         hl_decode_mb_simple_8(h);
02243 }
02244 
02245 static int pred_weight_table(H264Context *h){
02246     MpegEncContext * const s = &h->s;
02247     int list, i;
02248     int luma_def, chroma_def;
02249 
02250     h->use_weight= 0;
02251     h->use_weight_chroma= 0;
02252     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
02253     if(h->sps.chroma_format_idc)
02254         h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
02255     luma_def = 1<<h->luma_log2_weight_denom;
02256     chroma_def = 1<<h->chroma_log2_weight_denom;
02257 
02258     for(list=0; list<2; list++){
02259         h->luma_weight_flag[list]   = 0;
02260         h->chroma_weight_flag[list] = 0;
02261         for(i=0; i<h->ref_count[list]; i++){
02262             int luma_weight_flag, chroma_weight_flag;
02263 
02264             luma_weight_flag= get_bits1(&s->gb);
02265             if(luma_weight_flag){
02266                 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
02267                 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
02268                 if(   h->luma_weight[i][list][0] != luma_def
02269                    || h->luma_weight[i][list][1] != 0) {
02270                     h->use_weight= 1;
02271                     h->luma_weight_flag[list]= 1;
02272                 }
02273             }else{
02274                 h->luma_weight[i][list][0]= luma_def;
02275                 h->luma_weight[i][list][1]= 0;
02276             }
02277 
02278             if(h->sps.chroma_format_idc){
02279                 chroma_weight_flag= get_bits1(&s->gb);
02280                 if(chroma_weight_flag){
02281                     int j;
02282                     for(j=0; j<2; j++){
02283                         h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
02284                         h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
02285                         if(   h->chroma_weight[i][list][j][0] != chroma_def
02286                            || h->chroma_weight[i][list][j][1] != 0) {
02287                             h->use_weight_chroma= 1;
02288                             h->chroma_weight_flag[list]= 1;
02289                         }
02290                     }
02291                 }else{
02292                     int j;
02293                     for(j=0; j<2; j++){
02294                         h->chroma_weight[i][list][j][0]= chroma_def;
02295                         h->chroma_weight[i][list][j][1]= 0;
02296                     }
02297                 }
02298             }
02299         }
02300         if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;
02301     }
02302     h->use_weight= h->use_weight || h->use_weight_chroma;
02303     return 0;
02304 }
02305 
02311 static void implicit_weight_table(H264Context *h, int field){
02312     MpegEncContext * const s = &h->s;
02313     int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
02314 
02315     for (i = 0; i < 2; i++) {
02316         h->luma_weight_flag[i]   = 0;
02317         h->chroma_weight_flag[i] = 0;
02318     }
02319 
02320     if(field < 0){
02321         if (s->picture_structure == PICT_FRAME) {
02322             cur_poc = s->current_picture_ptr->poc;
02323         } else {
02324             cur_poc = s->current_picture_ptr->field_poc[s->picture_structure - 1];
02325         }
02326     if(   h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
02327        && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
02328         h->use_weight= 0;
02329         h->use_weight_chroma= 0;
02330         return;
02331     }
02332         ref_start= 0;
02333         ref_count0= h->ref_count[0];
02334         ref_count1= h->ref_count[1];
02335     }else{
02336         cur_poc = s->current_picture_ptr->field_poc[field];
02337         ref_start= 16;
02338         ref_count0= 16+2*h->ref_count[0];
02339         ref_count1= 16+2*h->ref_count[1];
02340     }
02341 
02342     h->use_weight= 2;
02343     h->use_weight_chroma= 2;
02344     h->luma_log2_weight_denom= 5;
02345     h->chroma_log2_weight_denom= 5;
02346 
02347     for(ref0=ref_start; ref0 < ref_count0; ref0++){
02348         int poc0 = h->ref_list[0][ref0].poc;
02349         for(ref1=ref_start; ref1 < ref_count1; ref1++){
02350             int w = 32;
02351             if (!h->ref_list[0][ref0].long_ref && !h->ref_list[1][ref1].long_ref) {
02352                 int poc1 = h->ref_list[1][ref1].poc;
02353                 int td = av_clip(poc1 - poc0, -128, 127);
02354                 if(td){
02355                     int tb = av_clip(cur_poc - poc0, -128, 127);
02356                     int tx = (16384 + (FFABS(td) >> 1)) / td;
02357                     int dist_scale_factor = (tb*tx + 32) >> 8;
02358                     if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
02359                         w = 64 - dist_scale_factor;
02360                 }
02361             }
02362             if(field<0){
02363                 h->implicit_weight[ref0][ref1][0]=
02364                 h->implicit_weight[ref0][ref1][1]= w;
02365             }else{
02366                 h->implicit_weight[ref0][ref1][field]=w;
02367             }
02368         }
02369     }
02370 }
02371 
02375 static void idr(H264Context *h){
02376     ff_h264_remove_all_refs(h);
02377     h->prev_frame_num= 0;
02378     h->prev_frame_num_offset= 0;
02379     h->prev_poc_msb=
02380     h->prev_poc_lsb= 0;
02381 }
02382 
02383 /* forget old pics after a seek */
02384 static void flush_dpb(AVCodecContext *avctx){
02385     H264Context *h= avctx->priv_data;
02386     int i;
02387     for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
02388         if(h->delayed_pic[i])
02389             h->delayed_pic[i]->f.reference = 0;
02390         h->delayed_pic[i]= NULL;
02391     }
02392     for (i = 0; i < MAX_DELAYED_PIC_COUNT; i++)
02393         h->last_pocs[i] = INT_MIN;
02394     h->outputed_poc=h->next_outputed_poc= INT_MIN;
02395     h->prev_interlaced_frame = 1;
02396     idr(h);
02397     if(h->s.current_picture_ptr)
02398         h->s.current_picture_ptr->f.reference = 0;
02399     h->s.first_field= 0;
02400     ff_h264_reset_sei(h);
02401     ff_mpeg_flush(avctx);
02402 }
02403 
02404 static int init_poc(H264Context *h){
02405     MpegEncContext * const s = &h->s;
02406     const int max_frame_num= 1<<h->sps.log2_max_frame_num;
02407     int field_poc[2];
02408     Picture *cur = s->current_picture_ptr;
02409 
02410     h->frame_num_offset= h->prev_frame_num_offset;
02411     if(h->frame_num < h->prev_frame_num)
02412         h->frame_num_offset += max_frame_num;
02413 
02414     if(h->sps.poc_type==0){
02415         const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
02416 
02417         if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
02418             h->poc_msb = h->prev_poc_msb + max_poc_lsb;
02419         else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
02420             h->poc_msb = h->prev_poc_msb - max_poc_lsb;
02421         else
02422             h->poc_msb = h->prev_poc_msb;
02423 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
02424         field_poc[0] =
02425         field_poc[1] = h->poc_msb + h->poc_lsb;
02426         if(s->picture_structure == PICT_FRAME)
02427             field_poc[1] += h->delta_poc_bottom;
02428     }else if(h->sps.poc_type==1){
02429         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
02430         int i;
02431 
02432         if(h->sps.poc_cycle_length != 0)
02433             abs_frame_num = h->frame_num_offset + h->frame_num;
02434         else
02435             abs_frame_num = 0;
02436 
02437         if(h->nal_ref_idc==0 && abs_frame_num > 0)
02438             abs_frame_num--;
02439 
02440         expected_delta_per_poc_cycle = 0;
02441         for(i=0; i < h->sps.poc_cycle_length; i++)
02442             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
02443 
02444         if(abs_frame_num > 0){
02445             int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
02446             int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
02447 
02448             expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
02449             for(i = 0; i <= frame_num_in_poc_cycle; i++)
02450                 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
02451         } else
02452             expectedpoc = 0;
02453 
02454         if(h->nal_ref_idc == 0)
02455             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
02456 
02457         field_poc[0] = expectedpoc + h->delta_poc[0];
02458         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
02459 
02460         if(s->picture_structure == PICT_FRAME)
02461             field_poc[1] += h->delta_poc[1];
02462     }else{
02463         int poc= 2*(h->frame_num_offset + h->frame_num);
02464 
02465         if(!h->nal_ref_idc)
02466             poc--;
02467 
02468         field_poc[0]= poc;
02469         field_poc[1]= poc;
02470     }
02471 
02472     if(s->picture_structure != PICT_BOTTOM_FIELD)
02473         s->current_picture_ptr->field_poc[0]= field_poc[0];
02474     if(s->picture_structure != PICT_TOP_FIELD)
02475         s->current_picture_ptr->field_poc[1]= field_poc[1];
02476     cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
02477 
02478     return 0;
02479 }
02480 
02481 
02485 static void init_scan_tables(H264Context *h){
02486     int i;
02487     for(i=0; i<16; i++){
02488 #define T(x) (x>>2) | ((x<<2) & 0xF)
02489         h->zigzag_scan[i] = T(zigzag_scan[i]);
02490         h-> field_scan[i] = T( field_scan[i]);
02491 #undef T
02492     }
02493     for(i=0; i<64; i++){
02494 #define T(x) (x>>3) | ((x&7)<<3)
02495         h->zigzag_scan8x8[i]       = T(ff_zigzag_direct[i]);
02496         h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
02497         h->field_scan8x8[i]        = T(field_scan8x8[i]);
02498         h->field_scan8x8_cavlc[i]  = T(field_scan8x8_cavlc[i]);
02499 #undef T
02500     }
02501     if(h->sps.transform_bypass){ //FIXME same ugly
02502         h->zigzag_scan_q0          = zigzag_scan;
02503         h->zigzag_scan8x8_q0       = ff_zigzag_direct;
02504         h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
02505         h->field_scan_q0           = field_scan;
02506         h->field_scan8x8_q0        = field_scan8x8;
02507         h->field_scan8x8_cavlc_q0  = field_scan8x8_cavlc;
02508     }else{
02509         h->zigzag_scan_q0          = h->zigzag_scan;
02510         h->zigzag_scan8x8_q0       = h->zigzag_scan8x8;
02511         h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
02512         h->field_scan_q0           = h->field_scan;
02513         h->field_scan8x8_q0        = h->field_scan8x8;
02514         h->field_scan8x8_cavlc_q0  = h->field_scan8x8_cavlc;
02515     }
02516 }
02517 
02518 static int field_end(H264Context *h, int in_setup){
02519     MpegEncContext * const s = &h->s;
02520     AVCodecContext * const avctx= s->avctx;
02521     int err = 0;
02522     s->mb_y= 0;
02523 
02524     if (!in_setup && !s->dropable)
02525         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
02526                                   s->picture_structure == PICT_BOTTOM_FIELD);
02527 
02528     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02529         ff_vdpau_h264_set_reference_frames(s);
02530 
02531     if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){
02532         if(!s->dropable) {
02533             err = ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
02534             h->prev_poc_msb= h->poc_msb;
02535             h->prev_poc_lsb= h->poc_lsb;
02536         }
02537         h->prev_frame_num_offset= h->frame_num_offset;
02538         h->prev_frame_num= h->frame_num;
02539         h->outputed_poc = h->next_outputed_poc;
02540     }
02541 
02542     if (avctx->hwaccel) {
02543         if (avctx->hwaccel->end_frame(avctx) < 0)
02544             av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
02545     }
02546 
02547     if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
02548         ff_vdpau_h264_picture_complete(s);
02549 
02550     /*
02551      * FIXME: Error handling code does not seem to support interlaced
02552      * when slices span multiple rows
02553      * The ff_er_add_slice calls don't work right for bottom
02554      * fields; they cause massive erroneous error concealing
02555      * Error marking covers both fields (top and bottom).
02556      * This causes a mismatched s->error_count
02557      * and a bad error table. Further, the error count goes to
02558      * INT_MAX when called for bottom field, because mb_y is
02559      * past end by one (callers fault) and resync_mb_y != 0
02560      * causes problems for the first MB line, too.
02561      */
02562     if (!FIELD_PICTURE)
02563         ff_er_frame_end(s);
02564 
02565     MPV_frame_end(s);
02566 
02567     h->current_slice=0;
02568 
02569     return err;
02570 }
02571 
02575 static void clone_slice(H264Context *dst, H264Context *src)
02576 {
02577     memcpy(dst->block_offset,     src->block_offset, sizeof(dst->block_offset));
02578     dst->s.current_picture_ptr  = src->s.current_picture_ptr;
02579     dst->s.current_picture      = src->s.current_picture;
02580     dst->s.linesize             = src->s.linesize;
02581     dst->s.uvlinesize           = src->s.uvlinesize;
02582     dst->s.first_field          = src->s.first_field;
02583 
02584     dst->prev_poc_msb           = src->prev_poc_msb;
02585     dst->prev_poc_lsb           = src->prev_poc_lsb;
02586     dst->prev_frame_num_offset  = src->prev_frame_num_offset;
02587     dst->prev_frame_num         = src->prev_frame_num;
02588     dst->short_ref_count        = src->short_ref_count;
02589 
02590     memcpy(dst->short_ref,        src->short_ref,        sizeof(dst->short_ref));
02591     memcpy(dst->long_ref,         src->long_ref,         sizeof(dst->long_ref));
02592     memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
02593     memcpy(dst->ref_list,         src->ref_list,         sizeof(dst->ref_list));
02594 
02595     memcpy(dst->dequant4_coeff,   src->dequant4_coeff,   sizeof(src->dequant4_coeff));
02596     memcpy(dst->dequant8_coeff,   src->dequant8_coeff,   sizeof(src->dequant8_coeff));
02597 }
02598 
02606 int ff_h264_get_profile(SPS *sps)
02607 {
02608     int profile = sps->profile_idc;
02609 
02610     switch(sps->profile_idc) {
02611     case FF_PROFILE_H264_BASELINE:
02612         // constraint_set1_flag set to 1
02613         profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
02614         break;
02615     case FF_PROFILE_H264_HIGH_10:
02616     case FF_PROFILE_H264_HIGH_422:
02617     case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
02618         // constraint_set3_flag set to 1
02619         profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
02620         break;
02621     }
02622 
02623     return profile;
02624 }
02625 
02635 static int decode_slice_header(H264Context *h, H264Context *h0){
02636     MpegEncContext * const s = &h->s;
02637     MpegEncContext * const s0 = &h0->s;
02638     unsigned int first_mb_in_slice;
02639     unsigned int pps_id;
02640     int num_ref_idx_active_override_flag;
02641     unsigned int slice_type, tmp, i, j;
02642     int default_ref_list_done = 0;
02643     int last_pic_structure, last_pic_dropable;
02644 
02645     /* FIXME: 2tap qpel isn't implemented for high bit depth. */
02646     if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){
02647         s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
02648         s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
02649     }else{
02650         s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
02651         s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
02652     }
02653 
02654     first_mb_in_slice= get_ue_golomb(&s->gb);
02655 
02656     if(first_mb_in_slice == 0){ //FIXME better field boundary detection
02657         if(h0->current_slice && FIELD_PICTURE){
02658             field_end(h, 1);
02659         }
02660 
02661         h0->current_slice = 0;
02662         if (!s0->first_field) {
02663             if (s->current_picture_ptr && !s->dropable &&
02664                 s->current_picture_ptr->owner2 == s) {
02665                 ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
02666                                           s->picture_structure == PICT_BOTTOM_FIELD);
02667             }
02668             s->current_picture_ptr = NULL;
02669         }
02670     }
02671 
02672     slice_type= get_ue_golomb_31(&s->gb);
02673     if(slice_type > 9){
02674         av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
02675         return -1;
02676     }
02677     if(slice_type > 4){
02678         slice_type -= 5;
02679         h->slice_type_fixed=1;
02680     }else
02681         h->slice_type_fixed=0;
02682 
02683     slice_type= golomb_to_pict_type[ slice_type ];
02684     if (slice_type == AV_PICTURE_TYPE_I
02685         || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
02686         default_ref_list_done = 1;
02687     }
02688     h->slice_type= slice_type;
02689     h->slice_type_nos= slice_type & 3;
02690 
02691     s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
02692 
02693     pps_id= get_ue_golomb(&s->gb);
02694     if(pps_id>=MAX_PPS_COUNT){
02695         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
02696         return -1;
02697     }
02698     if(!h0->pps_buffers[pps_id]) {
02699         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
02700         return -1;
02701     }
02702     h->pps= *h0->pps_buffers[pps_id];
02703 
02704     if(!h0->sps_buffers[h->pps.sps_id]) {
02705         av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
02706         return -1;
02707     }
02708     h->sps = *h0->sps_buffers[h->pps.sps_id];
02709 
02710     s->avctx->profile = ff_h264_get_profile(&h->sps);
02711     s->avctx->level   = h->sps.level_idc;
02712     s->avctx->refs    = h->sps.ref_frame_count;
02713 
02714     s->mb_width= h->sps.mb_width;
02715     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
02716 
02717     h->b_stride=  s->mb_width*4;
02718 
02719     s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p
02720 
02721     s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);
02722     if(h->sps.frame_mbs_only_flag)
02723         s->height= 16*s->mb_height - (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
02724     else
02725         s->height= 16*s->mb_height - (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1);
02726 
02727     if (FFALIGN(s->avctx->width,  16) == s->width &&
02728         FFALIGN(s->avctx->height, 16) == s->height) {
02729         s->width  = s->avctx->width;
02730         s->height = s->avctx->height;
02731     }
02732 
02733     if (s->context_initialized
02734         && (   s->width != s->avctx->width || s->height != s->avctx->height
02735             || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
02736         if(h != h0 || (HAVE_THREADS && h->s.avctx->active_thread_type & FF_THREAD_FRAME)) {
02737             av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);
02738             return AVERROR_PATCHWELCOME;   // width / height changed during parallelized decoding
02739         }
02740         free_tables(h, 0);
02741         flush_dpb(s->avctx);
02742         MPV_common_end(s);
02743     }
02744     if (!s->context_initialized) {
02745         if (h != h0) {
02746             av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");
02747             return -1;
02748         }
02749 
02750         avcodec_set_dimensions(s->avctx, s->width, s->height);
02751         s->avctx->sample_aspect_ratio= h->sps.sar;
02752         av_assert0(s->avctx->sample_aspect_ratio.den);
02753 
02754         if(h->sps.video_signal_type_present_flag){
02755             s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
02756             if(h->sps.colour_description_present_flag){
02757                 s->avctx->color_primaries = h->sps.color_primaries;
02758                 s->avctx->color_trc       = h->sps.color_trc;
02759                 s->avctx->colorspace      = h->sps.colorspace;
02760             }
02761         }
02762 
02763         if(h->sps.timing_info_present_flag){
02764             int64_t den= h->sps.time_scale;
02765             if(h->x264_build < 44U)
02766                 den *= 2;
02767             av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
02768                       h->sps.num_units_in_tick, den, 1<<30);
02769         }
02770 
02771         switch (h->sps.bit_depth_luma) {
02772             case 9 :
02773                 if (CHROMA444) {
02774                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02775                         s->avctx->pix_fmt = PIX_FMT_GBRP9;
02776                     } else
02777                         s->avctx->pix_fmt = PIX_FMT_YUV444P9;
02778                 } else if (CHROMA422)
02779                     s->avctx->pix_fmt = PIX_FMT_YUV422P9;
02780                 else
02781                     s->avctx->pix_fmt = PIX_FMT_YUV420P9;
02782                 break;
02783             case 10 :
02784                 if (CHROMA444) {
02785                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02786                         s->avctx->pix_fmt = PIX_FMT_GBRP10;
02787                     } else
02788                         s->avctx->pix_fmt = PIX_FMT_YUV444P10;
02789                 } else if (CHROMA422)
02790                     s->avctx->pix_fmt = PIX_FMT_YUV422P10;
02791                 else
02792                     s->avctx->pix_fmt = PIX_FMT_YUV420P10;
02793                 break;
02794             case 8:
02795                 if (CHROMA444){
02796                     if (s->avctx->colorspace == AVCOL_SPC_RGB) {
02797                         s->avctx->pix_fmt = PIX_FMT_GBRP;
02798                     } else
02799                         s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;
02800                 } else if (CHROMA422) {
02801                     s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P;
02802                 }else{
02803                     s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
02804                                                              s->avctx->codec->pix_fmts ?
02805                                                              s->avctx->codec->pix_fmts :
02806                                                              s->avctx->color_range == AVCOL_RANGE_JPEG ?
02807                                                              hwaccel_pixfmt_list_h264_jpeg_420 :
02808                                                              ff_hwaccel_pixfmt_list_420);
02809                 }
02810                 break;
02811             default:
02812                 av_log(s->avctx, AV_LOG_ERROR,
02813                        "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
02814                 return AVERROR_INVALIDDATA;
02815         }
02816 
02817         s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
02818 
02819         if (MPV_common_init(s) < 0) {
02820             av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");
02821             return -1;
02822         }
02823         s->first_field = 0;
02824         h->prev_interlaced_frame = 1;
02825 
02826         init_scan_tables(h);
02827         if (ff_h264_alloc_tables(h) < 0) {
02828             av_log(h->s.avctx, AV_LOG_ERROR, "Could not allocate memory for h264\n");
02829             return AVERROR(ENOMEM);
02830         }
02831 
02832         if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {
02833             if (context_init(h) < 0) {
02834                 av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02835                 return -1;
02836             }
02837         } else {
02838             for(i = 1; i < s->slice_context_count; i++) {
02839                 H264Context *c;
02840                 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
02841                 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
02842                 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
02843                 c->h264dsp = h->h264dsp;
02844                 c->sps = h->sps;
02845                 c->pps = h->pps;
02846                 c->pixel_shift = h->pixel_shift;
02847                 init_scan_tables(c);
02848                 clone_tables(c, h, i);
02849             }
02850 
02851             for(i = 0; i < s->slice_context_count; i++)
02852                 if (context_init(h->thread_context[i]) < 0) {
02853                     av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");
02854                     return -1;
02855                 }
02856         }
02857     }
02858 
02859     if(h == h0 && h->dequant_coeff_pps != pps_id){
02860         h->dequant_coeff_pps = pps_id;
02861         init_dequant_tables(h);
02862     }
02863 
02864     h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
02865 
02866     h->mb_mbaff = 0;
02867     h->mb_aff_frame = 0;
02868     last_pic_structure = s0->picture_structure;
02869     last_pic_dropable  = s0->dropable;
02870     s->dropable        = h->nal_ref_idc == 0;
02871     if(h->sps.frame_mbs_only_flag){
02872         s->picture_structure= PICT_FRAME;
02873     }else{
02874         if(get_bits1(&s->gb)) { //field_pic_flag
02875             s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
02876         } else {
02877             s->picture_structure= PICT_FRAME;
02878             h->mb_aff_frame = h->sps.mb_aff;
02879         }
02880     }
02881     h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
02882 
02883     if (h0->current_slice != 0) {
02884         if (last_pic_structure != s->picture_structure ||
02885             last_pic_dropable  != s->dropable) {
02886             av_log(h->s.avctx, AV_LOG_ERROR,
02887                    "Changing field mode (%d -> %d) between slices is not allowed\n",
02888                    last_pic_structure, s->picture_structure);
02889             s->picture_structure = last_pic_structure;
02890             s->dropable          = last_pic_dropable;
02891             return AVERROR_INVALIDDATA;
02892         } else if (!s->current_picture_ptr) {
02893             av_log(s->avctx, AV_LOG_ERROR,
02894                    "unset current_picture_ptr on %d. slice\n",
02895                    h0->current_slice + 1);
02896             return AVERROR_INVALIDDATA;
02897         }
02898     } else {
02899         /* Shorten frame num gaps so we don't have to allocate reference
02900          * frames just to throw them away */
02901         if (h->frame_num != h->prev_frame_num) {
02902             int unwrap_prev_frame_num = h->prev_frame_num;
02903             int max_frame_num         = 1 << h->sps.log2_max_frame_num;
02904 
02905             if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;
02906 
02907             if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {
02908                 unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;
02909                 if (unwrap_prev_frame_num < 0)
02910                     unwrap_prev_frame_num += max_frame_num;
02911 
02912                 h->prev_frame_num = unwrap_prev_frame_num;
02913             }
02914         }
02915 
02916         /* See if we have a decoded first field looking for a pair...
02917          * Here, we're using that to see if we should mark previously
02918          * decode frames as "finished".
02919          * We have to do that before the "dummy" in-between frame allocation,
02920          * since that can modify s->current_picture_ptr. */
02921         if (s0->first_field) {
02922             assert(s0->current_picture_ptr);
02923             assert(s0->current_picture_ptr->f.data[0]);
02924             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
02925 
02926             /* Mark old field/frame as completed */
02927             if (!last_pic_dropable && s0->current_picture_ptr->owner2 == s0) {
02928                 ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02929                                           last_pic_structure == PICT_BOTTOM_FIELD);
02930             }
02931 
02932             /* figure out if we have a complementary field pair */
02933             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
02934                 /* Previous field is unmatched. Don't display it, but let it
02935                  * remain for reference if marked as such. */
02936                 if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
02937                     ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02938                                               last_pic_structure == PICT_TOP_FIELD);
02939                 }
02940             } else {
02941                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
02942                     /* This and previous field were reference, but had
02943                      * different frame_nums. Consider this field first in
02944                      * pair. Throw away previous field except for reference
02945                      * purposes. */
02946                     if (!last_pic_dropable && last_pic_structure != PICT_FRAME) {
02947                         ff_thread_report_progress(&s0->current_picture_ptr->f, INT_MAX,
02948                                                   last_pic_structure == PICT_TOP_FIELD);
02949                     }
02950                 } else {
02951                     /* Second field in complementary pair */
02952                     if (!((last_pic_structure   == PICT_TOP_FIELD &&
02953                            s->picture_structure == PICT_BOTTOM_FIELD) ||
02954                           (last_pic_structure   == PICT_BOTTOM_FIELD &&
02955                            s->picture_structure == PICT_TOP_FIELD))) {
02956                         av_log(s->avctx, AV_LOG_ERROR,
02957                                "Invalid field mode combination %d/%d\n",
02958                                last_pic_structure, s->picture_structure);
02959                         s->picture_structure = last_pic_structure;
02960                         s->dropable          = last_pic_dropable;
02961                         return AVERROR_INVALIDDATA;
02962                     } else if (last_pic_dropable != s->dropable) {
02963                         av_log(s->avctx, AV_LOG_ERROR,
02964                                "Cannot combine reference and non-reference fields in the same frame\n");
02965                         av_log_ask_for_sample(s->avctx, NULL);
02966                         s->picture_structure = last_pic_structure;
02967                         s->dropable          = last_pic_dropable;
02968                         return AVERROR_INVALIDDATA;
02969                     }
02970 
02971                     /* Take ownership of this buffer. Note that if another thread owned
02972                      * the first field of this buffer, we're not operating on that pointer,
02973                      * so the original thread is still responsible for reporting progress
02974                      * on that first field (or if that was us, we just did that above).
02975                      * By taking ownership, we assign responsibility to ourselves to
02976                      * report progress on the second field. */
02977                     s0->current_picture_ptr->owner2 = s0;
02978                 }
02979             }
02980         }
02981 
02982         while (h->frame_num != h->prev_frame_num &&
02983                h->frame_num != (h->prev_frame_num + 1) % (1 << h->sps.log2_max_frame_num)) {
02984             Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
02985             av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
02986             if (ff_h264_frame_start(h) < 0)
02987                 return -1;
02988             h->prev_frame_num++;
02989             h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
02990             s->current_picture_ptr->frame_num= h->prev_frame_num;
02991             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);
02992             ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);
02993             ff_generate_sliding_window_mmcos(h);
02994             if (ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index) < 0 &&
02995                 (s->avctx->err_recognition & AV_EF_EXPLODE))
02996                 return AVERROR_INVALIDDATA;
02997             /* Error concealment: if a ref is missing, copy the previous ref in its place.
02998              * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
02999              * about there being no actual duplicates.
03000              * FIXME: this doesn't copy padding for out-of-frame motion vectors.  Given we're
03001              * concealing a lost frame, this probably isn't noticeable by comparison, but it should
03002              * be fixed. */
03003             if (h->short_ref_count) {
03004                 if (prev) {
03005                     av_image_copy(h->short_ref[0]->f.data, h->short_ref[0]->f.linesize,
03006                                   (const uint8_t**)prev->f.data, prev->f.linesize,
03007                                   s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
03008                     h->short_ref[0]->poc = prev->poc+2;
03009                 }
03010                 h->short_ref[0]->frame_num = h->prev_frame_num;
03011             }
03012         }
03013 
03014         /* See if we have a decoded first field looking for a pair...
03015          * We're using that to see whether to continue decoding in that
03016          * frame, or to allocate a new one. */
03017         if (s0->first_field) {
03018             assert(s0->current_picture_ptr);
03019             assert(s0->current_picture_ptr->f.data[0]);
03020             assert(s0->current_picture_ptr->f.reference != DELAYED_PIC_REF);
03021 
03022             /* figure out if we have a complementary field pair */
03023             if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
03024                 /*
03025                  * Previous field is unmatched. Don't display it, but let it
03026                  * remain for reference if marked as such.
03027                  */
03028                 s0->current_picture_ptr = NULL;
03029                 s0->first_field = FIELD_PICTURE;
03030 
03031             } else {
03032                 if (s0->current_picture_ptr->frame_num != h->frame_num) {
03033                     /* This and the previous field had different frame_nums.
03034                      * Consider this field first in pair. Throw away previous
03035                      * one except for reference purposes. */
03036                     s0->first_field         = 1;
03037                     s0->current_picture_ptr = NULL;
03038 
03039                 } else {
03040                     /* Second field in complementary pair */
03041                     s0->first_field = 0;
03042                 }
03043             }
03044 
03045         } else {
03046             /* Frame or first field in a potentially complementary pair */
03047             assert(!s0->current_picture_ptr);
03048             s0->first_field = FIELD_PICTURE;
03049         }
03050 
03051         if(!FIELD_PICTURE || s0->first_field) {
03052             if (ff_h264_frame_start(h) < 0) {
03053                 s0->first_field = 0;
03054                 return -1;
03055             }
03056         } else {
03057             ff_release_unused_pictures(s, 0);
03058         }
03059     }
03060     if(h != h0)
03061         clone_slice(h, h0);
03062 
03063     s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
03064 
03065     assert(s->mb_num == s->mb_width * s->mb_height);
03066     if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
03067        first_mb_in_slice                    >= s->mb_num){
03068         av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
03069         return -1;
03070     }
03071     s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
03072     s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
03073     if (s->picture_structure == PICT_BOTTOM_FIELD)
03074         s->resync_mb_y = s->mb_y = s->mb_y + 1;
03075     assert(s->mb_y < s->mb_height);
03076 
03077     if(s->picture_structure==PICT_FRAME){
03078         h->curr_pic_num=   h->frame_num;
03079         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
03080     }else{
03081         h->curr_pic_num= 2*h->frame_num + 1;
03082         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
03083     }
03084 
03085     if(h->nal_unit_type == NAL_IDR_SLICE){
03086         get_ue_golomb(&s->gb); /* idr_pic_id */
03087     }
03088 
03089     if(h->sps.poc_type==0){
03090         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
03091 
03092         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
03093             h->delta_poc_bottom= get_se_golomb(&s->gb);
03094         }
03095     }
03096 
03097     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
03098         h->delta_poc[0]= get_se_golomb(&s->gb);
03099 
03100         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
03101             h->delta_poc[1]= get_se_golomb(&s->gb);
03102     }
03103 
03104     init_poc(h);
03105 
03106     if(h->pps.redundant_pic_cnt_present){
03107         h->redundant_pic_count= get_ue_golomb(&s->gb);
03108     }
03109 
03110     //set defaults, might be overridden a few lines later
03111     h->ref_count[0]= h->pps.ref_count[0];
03112     h->ref_count[1]= h->pps.ref_count[1];
03113 
03114     if(h->slice_type_nos != AV_PICTURE_TYPE_I){
03115         int max_refs = s->picture_structure == PICT_FRAME ? 16 : 32;
03116 
03117         if(h->slice_type_nos == AV_PICTURE_TYPE_B){
03118             h->direct_spatial_mv_pred= get_bits1(&s->gb);
03119         }
03120         num_ref_idx_active_override_flag= get_bits1(&s->gb);
03121 
03122         if(num_ref_idx_active_override_flag){
03123             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
03124             if (h->ref_count[0] < 1)
03125                 return AVERROR_INVALIDDATA;
03126             if (h->slice_type_nos == AV_PICTURE_TYPE_B) {
03127                 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
03128                 if (h->ref_count[1] < 1)
03129                     return AVERROR_INVALIDDATA;
03130             }
03131         }
03132 
03133         if (h->ref_count[0] > max_refs || h->ref_count[1] > max_refs) {
03134             av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
03135             h->ref_count[0] = h->ref_count[1] = 1;
03136             return AVERROR_INVALIDDATA;
03137         }
03138 
03139         if(h->slice_type_nos == AV_PICTURE_TYPE_B)
03140             h->list_count= 2;
03141         else
03142             h->list_count= 1;
03143     }else
03144         h->list_count= 0;
03145 
03146     if(!default_ref_list_done){
03147         ff_h264_fill_default_ref_list(h);
03148     }
03149 
03150     if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0) {
03151         h->ref_count[1]= h->ref_count[0]= 0;
03152         return -1;
03153     }
03154 
03155     if(h->slice_type_nos!=AV_PICTURE_TYPE_I){
03156         s->last_picture_ptr= &h->ref_list[0][0];
03157         ff_copy_picture(&s->last_picture, s->last_picture_ptr);
03158     }
03159     if(h->slice_type_nos==AV_PICTURE_TYPE_B){
03160         s->next_picture_ptr= &h->ref_list[1][0];
03161         ff_copy_picture(&s->next_picture, s->next_picture_ptr);
03162     }
03163 
03164     if(   (h->pps.weighted_pred          && h->slice_type_nos == AV_PICTURE_TYPE_P )
03165        ||  (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )
03166         pred_weight_table(h);
03167     else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
03168         implicit_weight_table(h, -1);
03169     }else {
03170         h->use_weight = 0;
03171         for (i = 0; i < 2; i++) {
03172             h->luma_weight_flag[i]   = 0;
03173             h->chroma_weight_flag[i] = 0;
03174         }
03175     }
03176 
03177     if(h->nal_ref_idc && ff_h264_decode_ref_pic_marking(h0, &s->gb) < 0 &&
03178        (s->avctx->err_recognition & AV_EF_EXPLODE))
03179         return AVERROR_INVALIDDATA;
03180 
03181     if(FRAME_MBAFF){
03182         ff_h264_fill_mbaff_ref_list(h);
03183 
03184         if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){
03185             implicit_weight_table(h, 0);
03186             implicit_weight_table(h, 1);
03187         }
03188     }
03189 
03190     if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)
03191         ff_h264_direct_dist_scale_factor(h);
03192     ff_h264_direct_ref_list_init(h);
03193 
03194     if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){
03195         tmp = get_ue_golomb_31(&s->gb);
03196         if(tmp > 2){
03197             av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
03198             return -1;
03199         }
03200         h->cabac_init_idc= tmp;
03201     }
03202 
03203     h->last_qscale_diff = 0;
03204     tmp = h->pps.init_qp + get_se_golomb(&s->gb);
03205     if(tmp>51+6*(h->sps.bit_depth_luma-8)){
03206         av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
03207         return -1;
03208     }
03209     s->qscale= tmp;
03210     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03211     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03212     //FIXME qscale / qp ... stuff
03213     if(h->slice_type == AV_PICTURE_TYPE_SP){
03214         get_bits1(&s->gb); /* sp_for_switch_flag */
03215     }
03216     if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){
03217         get_se_golomb(&s->gb); /* slice_qs_delta */
03218     }
03219 
03220     h->deblocking_filter = 1;
03221     h->slice_alpha_c0_offset = 52;
03222     h->slice_beta_offset = 52;
03223     if( h->pps.deblocking_filter_parameters_present ) {
03224         tmp= get_ue_golomb_31(&s->gb);
03225         if(tmp > 2){
03226             av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
03227             return -1;
03228         }
03229         h->deblocking_filter= tmp;
03230         if(h->deblocking_filter < 2)
03231             h->deblocking_filter^= 1; // 1<->0
03232 
03233         if( h->deblocking_filter ) {
03234             h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
03235             h->slice_beta_offset     += get_se_golomb(&s->gb) << 1;
03236             if(   h->slice_alpha_c0_offset > 104U
03237                || h->slice_beta_offset     > 104U){
03238                 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
03239                 return -1;
03240             }
03241         }
03242     }
03243 
03244     if(   s->avctx->skip_loop_filter >= AVDISCARD_ALL
03245        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)
03246        ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR  && h->slice_type_nos == AV_PICTURE_TYPE_B)
03247        ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
03248         h->deblocking_filter= 0;
03249 
03250     if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
03251         if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
03252             /* Cheat slightly for speed:
03253                Do not bother to deblock across slices. */
03254             h->deblocking_filter = 2;
03255         } else {
03256             h0->max_contexts = 1;
03257             if(!h0->single_decode_warning) {
03258                 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
03259                 h0->single_decode_warning = 1;
03260             }
03261             if (h != h0) {
03262                 av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");
03263                 return 1;
03264             }
03265         }
03266     }
03267     h->qp_thresh = 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset)
03268                  - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1])
03269                  + 6 * (h->sps.bit_depth_luma - 8);
03270 
03271 #if 0 //FMO
03272     if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
03273         slice_group_change_cycle= get_bits(&s->gb, ?);
03274 #endif
03275 
03276     h0->last_slice_type = slice_type;
03277     h->slice_num = ++h0->current_slice;
03278     if(h->slice_num >= MAX_SLICES){
03279         av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
03280     }
03281 
03282     for(j=0; j<2; j++){
03283         int id_list[16];
03284         int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
03285         for(i=0; i<16; i++){
03286             id_list[i]= 60;
03287             if (h->ref_list[j][i].f.data[0]) {
03288                 int k;
03289                 uint8_t *base = h->ref_list[j][i].f.base[0];
03290                 for(k=0; k<h->short_ref_count; k++)
03291                     if (h->short_ref[k]->f.base[0] == base) {
03292                         id_list[i]= k;
03293                         break;
03294                     }
03295                 for(k=0; k<h->long_ref_count; k++)
03296                     if (h->long_ref[k] && h->long_ref[k]->f.base[0] == base) {
03297                         id_list[i]= h->short_ref_count + k;
03298                         break;
03299                     }
03300             }
03301         }
03302 
03303         ref2frm[0]=
03304         ref2frm[1]= -1;
03305         for(i=0; i<16; i++)
03306             ref2frm[i+2]= 4*id_list[i]
03307                           + (h->ref_list[j][i].f.reference & 3);
03308         ref2frm[18+0]=
03309         ref2frm[18+1]= -1;
03310         for(i=16; i<48; i++)
03311             ref2frm[i+4]= 4*id_list[(i-16)>>1]
03312                           + (h->ref_list[j][i].f.reference & 3);
03313     }
03314 
03315     //FIXME: fix draw_edges+PAFF+frame threads
03316     h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;
03317     h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
03318 
03319     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
03320         av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
03321                h->slice_num,
03322                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
03323                first_mb_in_slice,
03324                av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
03325                pps_id, h->frame_num,
03326                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
03327                h->ref_count[0], h->ref_count[1],
03328                s->qscale,
03329                h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
03330                h->use_weight,
03331                h->use_weight==1 && h->use_weight_chroma ? "c" : "",
03332                h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
03333                );
03334     }
03335 
03336     return 0;
03337 }
03338 
03339 int ff_h264_get_slice_type(const H264Context *h)
03340 {
03341     switch (h->slice_type) {
03342     case AV_PICTURE_TYPE_P:  return 0;
03343     case AV_PICTURE_TYPE_B:  return 1;
03344     case AV_PICTURE_TYPE_I:  return 2;
03345     case AV_PICTURE_TYPE_SP: return 3;
03346     case AV_PICTURE_TYPE_SI: return 4;
03347     default:         return -1;
03348     }
03349 }
03350 
03351 static av_always_inline void fill_filter_caches_inter(H264Context *h, MpegEncContext * const s, int mb_type, int top_xy,
03352                                                       int left_xy[LEFT_MBS], int top_type, int left_type[LEFT_MBS], int mb_xy, int list)
03353 {
03354     int b_stride = h->b_stride;
03355     int16_t (*mv_dst)[2] = &h->mv_cache[list][scan8[0]];
03356     int8_t *ref_cache = &h->ref_cache[list][scan8[0]];
03357     if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
03358         if(USES_LIST(top_type, list)){
03359             const int b_xy= h->mb2b_xy[top_xy] + 3*b_stride;
03360             const int b8_xy= 4*top_xy + 2;
03361             int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03362             AV_COPY128(mv_dst - 1*8, s->current_picture.f.motion_val[list][b_xy + 0]);
03363             ref_cache[0 - 1*8]=
03364             ref_cache[1 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 0]];
03365             ref_cache[2 - 1*8]=
03366             ref_cache[3 - 1*8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 1]];
03367         }else{
03368             AV_ZERO128(mv_dst - 1*8);
03369             AV_WN32A(&ref_cache[0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03370         }
03371 
03372         if(!IS_INTERLACED(mb_type^left_type[LTOP])){
03373             if(USES_LIST(left_type[LTOP], list)){
03374                 const int b_xy= h->mb2b_xy[left_xy[LTOP]] + 3;
03375                 const int b8_xy= 4*left_xy[LTOP] + 1;
03376                 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[LTOP]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03377                 AV_COPY32(mv_dst - 1 +  0, s->current_picture.f.motion_val[list][b_xy + b_stride*0]);
03378                 AV_COPY32(mv_dst - 1 +  8, s->current_picture.f.motion_val[list][b_xy + b_stride*1]);
03379                 AV_COPY32(mv_dst - 1 + 16, s->current_picture.f.motion_val[list][b_xy + b_stride*2]);
03380                 AV_COPY32(mv_dst - 1 + 24, s->current_picture.f.motion_val[list][b_xy + b_stride*3]);
03381                 ref_cache[-1 +  0]=
03382                 ref_cache[-1 +  8]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*0]];
03383                 ref_cache[-1 + 16]=
03384                 ref_cache[-1 + 24]= ref2frm[list][s->current_picture.f.ref_index[list][b8_xy + 2*1]];
03385             }else{
03386                 AV_ZERO32(mv_dst - 1 + 0);
03387                 AV_ZERO32(mv_dst - 1 + 8);
03388                 AV_ZERO32(mv_dst - 1 +16);
03389                 AV_ZERO32(mv_dst - 1 +24);
03390                 ref_cache[-1 +  0]=
03391                 ref_cache[-1 +  8]=
03392                 ref_cache[-1 + 16]=
03393                 ref_cache[-1 + 24]= LIST_NOT_USED;
03394             }
03395         }
03396     }
03397 
03398     if(!USES_LIST(mb_type, list)){
03399         fill_rectangle(mv_dst, 4, 4, 8, pack16to32(0,0), 4);
03400         AV_WN32A(&ref_cache[0*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03401         AV_WN32A(&ref_cache[1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03402         AV_WN32A(&ref_cache[2*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03403         AV_WN32A(&ref_cache[3*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
03404         return;
03405     }
03406 
03407     {
03408         int8_t *ref = &s->current_picture.f.ref_index[list][4*mb_xy];
03409         int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
03410         uint32_t ref01 = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
03411         uint32_t ref23 = (pack16to32(ref2frm[list][ref[2]],ref2frm[list][ref[3]])&0x00FF00FF)*0x0101;
03412         AV_WN32A(&ref_cache[0*8], ref01);
03413         AV_WN32A(&ref_cache[1*8], ref01);
03414         AV_WN32A(&ref_cache[2*8], ref23);
03415         AV_WN32A(&ref_cache[3*8], ref23);
03416     }
03417 
03418     {
03419         int16_t (*mv_src)[2] = &s->current_picture.f.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
03420         AV_COPY128(mv_dst + 8*0, mv_src + 0*b_stride);
03421         AV_COPY128(mv_dst + 8*1, mv_src + 1*b_stride);
03422         AV_COPY128(mv_dst + 8*2, mv_src + 2*b_stride);
03423         AV_COPY128(mv_dst + 8*3, mv_src + 3*b_stride);
03424     }
03425 }
03426 
03431 static int fill_filter_caches(H264Context *h, int mb_type){
03432     MpegEncContext * const s = &h->s;
03433     const int mb_xy= h->mb_xy;
03434     int top_xy, left_xy[LEFT_MBS];
03435     int top_type, left_type[LEFT_MBS];
03436     uint8_t *nnz;
03437     uint8_t *nnz_cache;
03438 
03439     top_xy     = mb_xy  - (s->mb_stride << MB_FIELD);
03440 
03441     /* Wow, what a mess, why didn't they simplify the interlacing & intra
03442      * stuff, I can't imagine that these complex rules are worth it. */
03443 
03444     left_xy[LBOT] = left_xy[LTOP] = mb_xy-1;
03445     if(FRAME_MBAFF){
03446         const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.f.mb_type[mb_xy - 1]);
03447         const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
03448         if(s->mb_y&1){
03449             if (left_mb_field_flag != curr_mb_field_flag) {
03450                 left_xy[LTOP] -= s->mb_stride;
03451             }
03452         }else{
03453             if(curr_mb_field_flag){
03454                 top_xy += s->mb_stride & (((s->current_picture.f.mb_type[top_xy] >> 7) & 1) - 1);
03455             }
03456             if (left_mb_field_flag != curr_mb_field_flag) {
03457                 left_xy[LBOT] += s->mb_stride;
03458             }
03459         }
03460     }
03461 
03462     h->top_mb_xy = top_xy;
03463     h->left_mb_xy[LTOP] = left_xy[LTOP];
03464     h->left_mb_xy[LBOT] = left_xy[LBOT];
03465     {
03466         //for sufficiently low qp, filtering wouldn't do anything
03467         //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
03468         int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
03469         int qp = s->current_picture.f.qscale_table[mb_xy];
03470         if(qp <= qp_thresh
03471            && (left_xy[LTOP] < 0 || ((qp + s->current_picture.f.qscale_table[left_xy[LTOP]] + 1) >> 1) <= qp_thresh)
03472            && (top_xy        < 0 || ((qp + s->current_picture.f.qscale_table[top_xy       ] + 1) >> 1) <= qp_thresh)) {
03473             if(!FRAME_MBAFF)
03474                 return 1;
03475             if ((left_xy[LTOP] < 0            || ((qp + s->current_picture.f.qscale_table[left_xy[LBOT]        ] + 1) >> 1) <= qp_thresh) &&
03476                 (top_xy        < s->mb_stride || ((qp + s->current_picture.f.qscale_table[top_xy - s->mb_stride] + 1) >> 1) <= qp_thresh))
03477                 return 1;
03478         }
03479     }
03480 
03481     top_type        = s->current_picture.f.mb_type[top_xy];
03482     left_type[LTOP] = s->current_picture.f.mb_type[left_xy[LTOP]];
03483     left_type[LBOT] = s->current_picture.f.mb_type[left_xy[LBOT]];
03484     if(h->deblocking_filter == 2){
03485         if(h->slice_table[top_xy       ] != h->slice_num) top_type= 0;
03486         if(h->slice_table[left_xy[LBOT]] != h->slice_num) left_type[LTOP]= left_type[LBOT]= 0;
03487     }else{
03488         if(h->slice_table[top_xy       ] == 0xFFFF) top_type= 0;
03489         if(h->slice_table[left_xy[LBOT]] == 0xFFFF) left_type[LTOP]= left_type[LBOT] =0;
03490     }
03491     h->top_type       = top_type;
03492     h->left_type[LTOP]= left_type[LTOP];
03493     h->left_type[LBOT]= left_type[LBOT];
03494 
03495     if(IS_INTRA(mb_type))
03496         return 0;
03497 
03498     fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 0);
03499     if(h->list_count == 2)
03500         fill_filter_caches_inter(h, s, mb_type, top_xy, left_xy, top_type, left_type, mb_xy, 1);
03501 
03502     nnz = h->non_zero_count[mb_xy];
03503     nnz_cache = h->non_zero_count_cache;
03504     AV_COPY32(&nnz_cache[4+8*1], &nnz[ 0]);
03505     AV_COPY32(&nnz_cache[4+8*2], &nnz[ 4]);
03506     AV_COPY32(&nnz_cache[4+8*3], &nnz[ 8]);
03507     AV_COPY32(&nnz_cache[4+8*4], &nnz[12]);
03508     h->cbp= h->cbp_table[mb_xy];
03509 
03510     if(top_type){
03511         nnz = h->non_zero_count[top_xy];
03512         AV_COPY32(&nnz_cache[4+8*0], &nnz[3*4]);
03513     }
03514 
03515     if(left_type[LTOP]){
03516         nnz = h->non_zero_count[left_xy[LTOP]];
03517         nnz_cache[3+8*1]= nnz[3+0*4];
03518         nnz_cache[3+8*2]= nnz[3+1*4];
03519         nnz_cache[3+8*3]= nnz[3+2*4];
03520         nnz_cache[3+8*4]= nnz[3+3*4];
03521     }
03522 
03523     // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
03524     if(!CABAC && h->pps.transform_8x8_mode){
03525         if(IS_8x8DCT(top_type)){
03526             nnz_cache[4+8*0]=
03527             nnz_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;
03528             nnz_cache[6+8*0]=
03529             nnz_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;
03530         }
03531         if(IS_8x8DCT(left_type[LTOP])){
03532             nnz_cache[3+8*1]=
03533             nnz_cache[3+8*2]= (h->cbp_table[left_xy[LTOP]]&0x2000) >> 12; //FIXME check MBAFF
03534         }
03535         if(IS_8x8DCT(left_type[LBOT])){
03536             nnz_cache[3+8*3]=
03537             nnz_cache[3+8*4]= (h->cbp_table[left_xy[LBOT]]&0x8000) >> 12; //FIXME check MBAFF
03538         }
03539 
03540         if(IS_8x8DCT(mb_type)){
03541             nnz_cache[scan8[0   ]]= nnz_cache[scan8[1   ]]=
03542             nnz_cache[scan8[2   ]]= nnz_cache[scan8[3   ]]= (h->cbp & 0x1000) >> 12;
03543 
03544             nnz_cache[scan8[0+ 4]]= nnz_cache[scan8[1+ 4]]=
03545             nnz_cache[scan8[2+ 4]]= nnz_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;
03546 
03547             nnz_cache[scan8[0+ 8]]= nnz_cache[scan8[1+ 8]]=
03548             nnz_cache[scan8[2+ 8]]= nnz_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;
03549 
03550             nnz_cache[scan8[0+12]]= nnz_cache[scan8[1+12]]=
03551             nnz_cache[scan8[2+12]]= nnz_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;
03552         }
03553     }
03554 
03555     return 0;
03556 }
03557 
03558 static void loop_filter(H264Context *h, int start_x, int end_x){
03559     MpegEncContext * const s = &h->s;
03560     uint8_t  *dest_y, *dest_cb, *dest_cr;
03561     int linesize, uvlinesize, mb_x, mb_y;
03562     const int end_mb_y= s->mb_y + FRAME_MBAFF;
03563     const int old_slice_type= h->slice_type;
03564     const int pixel_shift = h->pixel_shift;
03565     const int block_h = 16 >> s->chroma_y_shift;
03566 
03567     if(h->deblocking_filter) {
03568         for(mb_x= start_x; mb_x<end_x; mb_x++){
03569             for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
03570                 int mb_xy, mb_type;
03571                 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
03572                 h->slice_num= h->slice_table[mb_xy];
03573                 mb_type = s->current_picture.f.mb_type[mb_xy];
03574                 h->list_count= h->list_counts[mb_xy];
03575 
03576                 if(FRAME_MBAFF)
03577                     h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
03578 
03579                 s->mb_x= mb_x;
03580                 s->mb_y= mb_y;
03581                 dest_y  = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize  ) * 16;
03582                 dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
03583                 dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h;
03584                     //FIXME simplify above
03585 
03586                 if (MB_FIELD) {
03587                     linesize   = h->mb_linesize   = s->linesize * 2;
03588                     uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
03589                     if(mb_y&1){ //FIXME move out of this function?
03590                         dest_y -= s->linesize*15;
03591                         dest_cb-= s->uvlinesize * (block_h - 1);
03592                         dest_cr-= s->uvlinesize * (block_h - 1);
03593                     }
03594                 } else {
03595                     linesize   = h->mb_linesize   = s->linesize;
03596                     uvlinesize = h->mb_uvlinesize = s->uvlinesize;
03597                 }
03598                 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
03599                 if(fill_filter_caches(h, mb_type))
03600                     continue;
03601                 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]);
03602                 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.f.qscale_table[mb_xy]);
03603 
03604                 if (FRAME_MBAFF) {
03605                     ff_h264_filter_mb     (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03606                 } else {
03607                     ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
03608                 }
03609             }
03610         }
03611     }
03612     h->slice_type= old_slice_type;
03613     s->mb_x= end_x;
03614     s->mb_y= end_mb_y - FRAME_MBAFF;
03615     h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
03616     h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
03617 }
03618 
03619 static void predict_field_decoding_flag(H264Context *h){
03620     MpegEncContext * const s = &h->s;
03621     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
03622     int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
03623                 ? s->current_picture.f.mb_type[mb_xy - 1]
03624                 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
03625                 ? s->current_picture.f.mb_type[mb_xy - s->mb_stride]
03626                 : 0;
03627     h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
03628 }
03629 
03633 static void decode_finish_row(H264Context *h){
03634     MpegEncContext * const s = &h->s;
03635     int top = 16*(s->mb_y >> FIELD_PICTURE);
03636     int height = 16 << FRAME_MBAFF;
03637     int deblock_border = (16 + 4) << FRAME_MBAFF;
03638     int pic_height = 16*s->mb_height >> FIELD_PICTURE;
03639 
03640     if (h->deblocking_filter) {
03641         if((top + height) >= pic_height)
03642             height += deblock_border;
03643 
03644         top -= deblock_border;
03645     }
03646 
03647     if (top >= pic_height || (top + height) < h->emu_edge_height)
03648         return;
03649 
03650     height = FFMIN(height, pic_height - top);
03651     if (top < h->emu_edge_height) {
03652         height = top+height;
03653         top = 0;
03654     }
03655 
03656     ff_draw_horiz_band(s, top, height);
03657 
03658     if (s->dropable) return;
03659 
03660     ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,
03661                              s->picture_structure==PICT_BOTTOM_FIELD);
03662 }
03663 
03664 static int decode_slice(struct AVCodecContext *avctx, void *arg){
03665     H264Context *h = *(void**)arg;
03666     MpegEncContext * const s = &h->s;
03667     const int part_mask= s->partitioned_frame ? (ER_AC_END|ER_AC_ERROR) : 0x7F;
03668     int lf_x_start = s->mb_x;
03669 
03670     s->mb_skip_run= -1;
03671 
03672     h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
03673                     (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
03674 
03675     if( h->pps.cabac ) {
03676         /* realign */
03677         align_get_bits( &s->gb );
03678 
03679         /* init cabac */
03680         ff_init_cabac_states( &h->cabac);
03681         ff_init_cabac_decoder( &h->cabac,
03682                                s->gb.buffer + get_bits_count(&s->gb)/8,
03683                                (get_bits_left(&s->gb) + 7)/8);
03684 
03685         ff_h264_init_cabac_states(h);
03686 
03687         for(;;){
03688 //START_TIMER
03689             int ret = ff_h264_decode_mb_cabac(h);
03690             int eos;
03691 //STOP_TIMER("decode_mb_cabac")
03692 
03693             if(ret>=0) ff_h264_hl_decode_mb(h);
03694 
03695             if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
03696                 s->mb_y++;
03697 
03698                 ret = ff_h264_decode_mb_cabac(h);
03699 
03700                 if(ret>=0) ff_h264_hl_decode_mb(h);
03701                 s->mb_y--;
03702             }
03703             eos = get_cabac_terminate( &h->cabac );
03704 
03705             if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
03706                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03707                 if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);
03708                 return 0;
03709             }
03710             if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
03711                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
03712                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03713                 return -1;
03714             }
03715 
03716             if( ++s->mb_x >= s->mb_width ) {
03717                 loop_filter(h, lf_x_start, s->mb_x);
03718                 s->mb_x = lf_x_start = 0;
03719                 decode_finish_row(h);
03720                 ++s->mb_y;
03721                 if(FIELD_OR_MBAFF_PICTURE) {
03722                     ++s->mb_y;
03723                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03724                         predict_field_decoding_flag(h);
03725                 }
03726             }
03727 
03728             if( eos || s->mb_y >= s->mb_height ) {
03729                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03730                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03731                 if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03732                 return 0;
03733             }
03734         }
03735 
03736     } else {
03737         for(;;){
03738             int ret = ff_h264_decode_mb_cavlc(h);
03739 
03740             if(ret>=0) ff_h264_hl_decode_mb(h);
03741 
03742             if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
03743                 s->mb_y++;
03744                 ret = ff_h264_decode_mb_cavlc(h);
03745 
03746                 if(ret>=0) ff_h264_hl_decode_mb(h);
03747                 s->mb_y--;
03748             }
03749 
03750             if(ret<0){
03751                 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
03752                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03753                 return -1;
03754             }
03755 
03756             if(++s->mb_x >= s->mb_width){
03757                 loop_filter(h, lf_x_start, s->mb_x);
03758                 s->mb_x = lf_x_start = 0;
03759                 decode_finish_row(h);
03760                 ++s->mb_y;
03761                 if(FIELD_OR_MBAFF_PICTURE) {
03762                     ++s->mb_y;
03763                     if(FRAME_MBAFF && s->mb_y < s->mb_height)
03764                         predict_field_decoding_flag(h);
03765                 }
03766                 if(s->mb_y >= s->mb_height){
03767                     tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03768 
03769                     if (get_bits_left(&s->gb) == 0) {
03770                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03771 
03772                         return 0;
03773                     } else {
03774                         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y,
03775                                         s->mb_x - 1, s->mb_y,
03776                                         ER_MB_END & part_mask);
03777                         return -1;
03778                     }
03779                 }
03780             }
03781 
03782             if (get_bits_left(&s->gb) <= 0 && s->mb_skip_run <= 0){
03783                 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
03784                 if (get_bits_left(&s->gb) == 0) {
03785                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, ER_MB_END&part_mask);
03786                     if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);
03787 
03788                     return 0;
03789                 }else{
03790                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, ER_MB_ERROR&part_mask);
03791 
03792                     return -1;
03793                 }
03794             }
03795         }
03796     }
03797 }
03798 
03805 static int execute_decode_slices(H264Context *h, int context_count){
03806     MpegEncContext * const s = &h->s;
03807     AVCodecContext * const avctx= s->avctx;
03808     H264Context *hx;
03809     int i;
03810 
03811     if (s->avctx->hwaccel || s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03812         return 0;
03813     if(context_count == 1) {
03814         return decode_slice(avctx, &h);
03815     } else {
03816         for(i = 1; i < context_count; i++) {
03817             hx = h->thread_context[i];
03818             hx->s.err_recognition = avctx->err_recognition;
03819             hx->s.error_count = 0;
03820         }
03821 
03822         avctx->execute(avctx, decode_slice,
03823                        h->thread_context, NULL, context_count, sizeof(void*));
03824 
03825         /* pull back stuff from slices to master context */
03826         hx = h->thread_context[context_count - 1];
03827         s->mb_x = hx->s.mb_x;
03828         s->mb_y = hx->s.mb_y;
03829         s->dropable = hx->s.dropable;
03830         s->picture_structure = hx->s.picture_structure;
03831         for(i = 1; i < context_count; i++)
03832             h->s.error_count += h->thread_context[i]->s.error_count;
03833     }
03834 
03835     return 0;
03836 }
03837 
03838 
03839 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
03840     MpegEncContext * const s = &h->s;
03841     AVCodecContext * const avctx= s->avctx;
03842     H264Context *hx; 
03843     int buf_index;
03844     int context_count;
03845     int next_avc;
03846     int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);
03847     int nals_needed=0; 
03848     int nal_index;
03849 
03850     h->max_contexts = s->slice_context_count;
03851     if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
03852         h->current_slice = 0;
03853         if (!s->first_field)
03854             s->current_picture_ptr= NULL;
03855         ff_h264_reset_sei(h);
03856     }
03857 
03858     for(;pass <= 1;pass++){
03859         buf_index = 0;
03860         context_count = 0;
03861         next_avc = h->is_avc ? 0 : buf_size;
03862         nal_index = 0;
03863     for(;;){
03864         int consumed;
03865         int dst_length;
03866         int bit_length;
03867         const uint8_t *ptr;
03868         int i, nalsize = 0;
03869         int err;
03870 
03871         if(buf_index >= next_avc) {
03872             if (buf_index >= buf_size - h->nal_length_size) break;
03873             nalsize = 0;
03874             for(i = 0; i < h->nal_length_size; i++)
03875                 nalsize = (nalsize << 8) | buf[buf_index++];
03876             if(nalsize <= 0 || nalsize > buf_size - buf_index){
03877                 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
03878                 break;
03879             }
03880             next_avc= buf_index + nalsize;
03881         } else {
03882             // start code prefix search
03883             for(; buf_index + 3 < next_avc; buf_index++){
03884                 // This should always succeed in the first iteration.
03885                 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
03886                     break;
03887             }
03888 
03889 
03890             if (buf_index + 3 >= buf_size) {
03891                 buf_index = buf_size;
03892                 break;
03893             }
03894 
03895             buf_index+=3;
03896             if(buf_index >= next_avc) continue;
03897         }
03898 
03899         hx = h->thread_context[context_count];
03900 
03901         ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
03902         if (ptr == NULL || dst_length < 0) {
03903             buf_index = -1;
03904             goto end;
03905         }
03906         i= buf_index + consumed;
03907         if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
03908            buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
03909             s->workaround_bugs |= FF_BUG_TRUNCATED;
03910 
03911         if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
03912         while(ptr[dst_length - 1] == 0 && dst_length > 0)
03913             dst_length--;
03914         }
03915         bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
03916 
03917         if(s->avctx->debug&FF_DEBUG_STARTCODE){
03918             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
03919         }
03920 
03921         if (h->is_avc && (nalsize != consumed) && nalsize){
03922             av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
03923         }
03924 
03925         buf_index += consumed;
03926         nal_index++;
03927 
03928         if(pass == 0) {
03929             // packets can sometimes contain multiple PPS/SPS
03930             // e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely
03931             // if so, when frame threading we can't start the next thread until we've read all of them
03932             switch (hx->nal_unit_type) {
03933                 case NAL_SPS:
03934                 case NAL_PPS:
03935                     nals_needed = nal_index;
03936                     break;
03937                 case NAL_IDR_SLICE:
03938                 case NAL_SLICE:
03939                     init_get_bits(&hx->s.gb, ptr, bit_length);
03940                     if (!get_ue_golomb(&hx->s.gb))
03941                         nals_needed = nal_index;
03942             }
03943             continue;
03944         }
03945 
03946         //FIXME do not discard SEI id
03947         if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0)
03948             continue;
03949 
03950       again:
03951         err = 0;
03952         switch(hx->nal_unit_type){
03953         case NAL_IDR_SLICE:
03954             if (h->nal_unit_type != NAL_IDR_SLICE) {
03955                 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
03956                 buf_index = -1;
03957                 goto end;
03958             }
03959             idr(h); // FIXME ensure we don't lose some frames if there is reordering
03960         case NAL_SLICE:
03961             init_get_bits(&hx->s.gb, ptr, bit_length);
03962             hx->intra_gb_ptr=
03963             hx->inter_gb_ptr= &hx->s.gb;
03964             hx->s.data_partitioning = 0;
03965 
03966             if((err = decode_slice_header(hx, h)))
03967                break;
03968 
03969             s->current_picture_ptr->f.key_frame |=
03970                     (hx->nal_unit_type == NAL_IDR_SLICE) ||
03971                     (h->sei_recovery_frame_cnt >= 0);
03972 
03973             if (h->current_slice == 1) {
03974                 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {
03975                     decode_postinit(h, nal_index >= nals_needed);
03976                 }
03977 
03978                 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
03979                     return -1;
03980                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
03981                     ff_vdpau_h264_picture_start(s);
03982             }
03983 
03984             if(hx->redundant_pic_count==0
03985                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
03986                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
03987                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
03988                && avctx->skip_frame < AVDISCARD_ALL){
03989                 if(avctx->hwaccel) {
03990                     if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
03991                         return -1;
03992                 }else
03993                 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
03994                     static const uint8_t start_code[] = {0x00, 0x00, 0x01};
03995                     ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
03996                     ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
03997                 }else
03998                     context_count++;
03999             }
04000             break;
04001         case NAL_DPA:
04002             init_get_bits(&hx->s.gb, ptr, bit_length);
04003             hx->intra_gb_ptr=
04004             hx->inter_gb_ptr= NULL;
04005 
04006             if ((err = decode_slice_header(hx, h)) < 0)
04007                 break;
04008 
04009             hx->s.data_partitioning = 1;
04010 
04011             break;
04012         case NAL_DPB:
04013             init_get_bits(&hx->intra_gb, ptr, bit_length);
04014             hx->intra_gb_ptr= &hx->intra_gb;
04015             break;
04016         case NAL_DPC:
04017             init_get_bits(&hx->inter_gb, ptr, bit_length);
04018             hx->inter_gb_ptr= &hx->inter_gb;
04019 
04020             if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
04021                && s->current_picture_ptr
04022                && s->context_initialized
04023                && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
04024                && (avctx->skip_frame < AVDISCARD_BIDIR  || hx->slice_type_nos!=AV_PICTURE_TYPE_B)
04025                && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)
04026                && avctx->skip_frame < AVDISCARD_ALL)
04027                 context_count++;
04028             break;
04029         case NAL_SEI:
04030             init_get_bits(&s->gb, ptr, bit_length);
04031             ff_h264_decode_sei(h);
04032             break;
04033         case NAL_SPS:
04034             init_get_bits(&s->gb, ptr, bit_length);
04035             if (ff_h264_decode_seq_parameter_set(h) < 0 &&
04036                 h->is_avc && (nalsize != consumed) && nalsize) {
04037                 av_log(h->s.avctx, AV_LOG_DEBUG, "SPS decoding failure, "
04038                        "try parsing the coomplete NAL\n");
04039                 init_get_bits(&s->gb, buf + buf_index + 1 - consumed,
04040                               8 * (nalsize - 1));
04041                 ff_h264_decode_seq_parameter_set(h);
04042             }
04043 
04044             if (s->flags & CODEC_FLAG_LOW_DELAY ||
04045                 (h->sps.bitstream_restriction_flag &&
04046                  !h->sps.num_reorder_frames)) {
04047                 if (s->avctx->has_b_frames > 1 || h->delayed_pic[0])
04048                     av_log(avctx, AV_LOG_WARNING, "Delayed frames seen "
04049                            "reenabling low delay requires a codec "
04050                            "flush.\n");
04051                 else
04052                     s->low_delay = 1;
04053             }
04054 
04055             if(avctx->has_b_frames < 2)
04056                 avctx->has_b_frames= !s->low_delay;
04057 
04058             if (h->sps.bit_depth_luma != h->sps.bit_depth_chroma) {
04059                 av_log_missing_feature(s->avctx,
04060                     "Different bit depth between chroma and luma", 1);
04061                 return AVERROR_PATCHWELCOME;
04062             }
04063 
04064             if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma ||
04065                 h->cur_chroma_format_idc != h->sps.chroma_format_idc) {
04066                 if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {
04067                     avctx->bits_per_raw_sample = h->sps.bit_depth_luma;
04068                     h->cur_chroma_format_idc = h->sps.chroma_format_idc;
04069                     h->pixel_shift = h->sps.bit_depth_luma > 8;
04070 
04071                     ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
04072                     ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc);
04073                     s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
04074                     dsputil_init(&s->dsp, s->avctx);
04075                 } else {
04076                     av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
04077                     buf_index = -1;
04078                     goto end;
04079                 }
04080             }
04081             break;
04082         case NAL_PPS:
04083             init_get_bits(&s->gb, ptr, bit_length);
04084 
04085             ff_h264_decode_picture_parameter_set(h, bit_length);
04086 
04087             break;
04088         case NAL_AUD:
04089         case NAL_END_SEQUENCE:
04090         case NAL_END_STREAM:
04091         case NAL_FILLER_DATA:
04092         case NAL_SPS_EXT:
04093         case NAL_AUXILIARY_SLICE:
04094             break;
04095         default:
04096             av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
04097         }
04098 
04099         if(context_count == h->max_contexts) {
04100             execute_decode_slices(h, context_count);
04101             context_count = 0;
04102         }
04103 
04104         if (err < 0)
04105             av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
04106         else if(err == 1) {
04107             /* Slice could not be decoded in parallel mode, copy down
04108              * NAL unit stuff to context 0 and restart. Note that
04109              * rbsp_buffer is not transferred, but since we no longer
04110              * run in parallel mode this should not be an issue. */
04111             h->nal_unit_type = hx->nal_unit_type;
04112             h->nal_ref_idc   = hx->nal_ref_idc;
04113             hx = h;
04114             goto again;
04115         }
04116     }
04117     }
04118     if(context_count)
04119         execute_decode_slices(h, context_count);
04120 
04121 end:
04122     /* clean up */
04123     if (s->current_picture_ptr && s->current_picture_ptr->owner2 == s &&
04124         !s->dropable) {
04125         ff_thread_report_progress(&s->current_picture_ptr->f, INT_MAX,
04126                                   s->picture_structure == PICT_BOTTOM_FIELD);
04127     }
04128 
04129     return buf_index;
04130 }
04131 
04135 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
04136         if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
04137         if(pos+10>buf_size) pos=buf_size; // oops ;)
04138 
04139         return pos;
04140 }
04141 
04142 static int decode_frame(AVCodecContext *avctx,
04143                              void *data, int *data_size,
04144                              AVPacket *avpkt)
04145 {
04146     const uint8_t *buf = avpkt->data;
04147     int buf_size = avpkt->size;
04148     H264Context *h = avctx->priv_data;
04149     MpegEncContext *s = &h->s;
04150     AVFrame *pict = data;
04151     int buf_index = 0;
04152 
04153     s->flags= avctx->flags;
04154     s->flags2= avctx->flags2;
04155 
04156    /* end of stream, output what is still in the buffers */
04157  out:
04158     if (buf_size == 0) {
04159         Picture *out;
04160         int i, out_idx;
04161 
04162         s->current_picture_ptr = NULL;
04163 
04164 //FIXME factorize this with the output code below
04165         out = h->delayed_pic[0];
04166         out_idx = 0;
04167         for (i = 1; h->delayed_pic[i] && !h->delayed_pic[i]->f.key_frame && !h->delayed_pic[i]->mmco_reset; i++)
04168             if(h->delayed_pic[i]->poc < out->poc){
04169                 out = h->delayed_pic[i];
04170                 out_idx = i;
04171             }
04172 
04173         for(i=out_idx; h->delayed_pic[i]; i++)
04174             h->delayed_pic[i] = h->delayed_pic[i+1];
04175 
04176         if(out){
04177             *data_size = sizeof(AVFrame);
04178             *pict= *(AVFrame*)out;
04179         }
04180 
04181         return buf_index;
04182     }
04183 
04184     buf_index=decode_nal_units(h, buf, buf_size);
04185     if(buf_index < 0)
04186         return -1;
04187 
04188     if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
04189         buf_size = 0;
04190         goto out;
04191     }
04192 
04193     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
04194         if (avctx->skip_frame >= AVDISCARD_NONREF)
04195             return 0;
04196         av_log(avctx, AV_LOG_ERROR, "no frame!\n");
04197         return -1;
04198     }
04199 
04200     if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
04201 
04202         if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);
04203 
04204         field_end(h, 0);
04205 
04206         if (!h->next_output_pic) {
04207             /* Wait for second field. */
04208             *data_size = 0;
04209 
04210         } else {
04211             *data_size = sizeof(AVFrame);
04212             *pict = *(AVFrame*)h->next_output_pic;
04213         }
04214     }
04215 
04216     assert(pict->data[0] || !*data_size);
04217     ff_print_debug_info(s, pict);
04218 //printf("out %d\n", (int)pict->data[0]);
04219 
04220     return get_consumed_bytes(s, buf_index, buf_size);
04221 }
04222 #if 0
04223 static inline void fill_mb_avail(H264Context *h){
04224     MpegEncContext * const s = &h->s;
04225     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
04226 
04227     if(s->mb_y){
04228         h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
04229         h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
04230         h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
04231     }else{
04232         h->mb_avail[0]=
04233         h->mb_avail[1]=
04234         h->mb_avail[2]= 0;
04235     }
04236     h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
04237     h->mb_avail[4]= 1; //FIXME move out
04238     h->mb_avail[5]= 0; //FIXME move out
04239 }
04240 #endif
04241 
04242 #ifdef TEST
04243 #undef printf
04244 #undef random
04245 #define COUNT 8000
04246 #define SIZE (COUNT*40)
04247 int main(void){
04248     int i;
04249     uint8_t temp[SIZE];
04250     PutBitContext pb;
04251     GetBitContext gb;
04252     DSPContext dsp;
04253     AVCodecContext avctx;
04254 
04255     avctx.av_class = avcodec_get_class();
04256     dsputil_init(&dsp, &avctx);
04257 
04258     init_put_bits(&pb, temp, SIZE);
04259     printf("testing unsigned exp golomb\n");
04260     for(i=0; i<COUNT; i++){
04261         START_TIMER
04262         set_ue_golomb(&pb, i);
04263         STOP_TIMER("set_ue_golomb");
04264     }
04265     flush_put_bits(&pb);
04266 
04267     init_get_bits(&gb, temp, 8*SIZE);
04268     for(i=0; i<COUNT; i++){
04269         int j, s = show_bits(&gb, 24);
04270 
04271         START_TIMER
04272         j= get_ue_golomb(&gb);
04273         if(j != i){
04274             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04275 //            return -1;
04276         }
04277         STOP_TIMER("get_ue_golomb");
04278     }
04279 
04280 
04281     init_put_bits(&pb, temp, SIZE);
04282     printf("testing signed exp golomb\n");
04283     for(i=0; i<COUNT; i++){
04284         START_TIMER
04285         set_se_golomb(&pb, i - COUNT/2);
04286         STOP_TIMER("set_se_golomb");
04287     }
04288     flush_put_bits(&pb);
04289 
04290     init_get_bits(&gb, temp, 8*SIZE);
04291     for(i=0; i<COUNT; i++){
04292         int j, s = show_bits(&gb, 24);
04293 
04294         START_TIMER
04295         j= get_se_golomb(&gb);
04296         if(j != i - COUNT/2){
04297             printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
04298 //            return -1;
04299         }
04300         STOP_TIMER("get_se_golomb");
04301     }
04302 
04303     printf("Testing RBSP\n");
04304 
04305 
04306     return 0;
04307 }
04308 #endif /* TEST */
04309 
04310 
04311 av_cold void ff_h264_free_context(H264Context *h)
04312 {
04313     int i;
04314 
04315     free_tables(h, 1); //FIXME cleanup init stuff perhaps
04316 
04317     for(i = 0; i < MAX_SPS_COUNT; i++)
04318         av_freep(h->sps_buffers + i);
04319 
04320     for(i = 0; i < MAX_PPS_COUNT; i++)
04321         av_freep(h->pps_buffers + i);
04322 }
04323 
04324 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
04325 {
04326     H264Context *h = avctx->priv_data;
04327     MpegEncContext *s = &h->s;
04328 
04329     ff_h264_free_context(h);
04330 
04331     MPV_common_end(s);
04332 
04333 //    memset(h, 0, sizeof(H264Context));
04334 
04335     return 0;
04336 }
04337 
04338 static const AVProfile profiles[] = {
04339     { FF_PROFILE_H264_BASELINE,             "Baseline"              },
04340     { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline"  },
04341     { FF_PROFILE_H264_MAIN,                 "Main"                  },
04342     { FF_PROFILE_H264_EXTENDED,             "Extended"              },
04343     { FF_PROFILE_H264_HIGH,                 "High"                  },
04344     { FF_PROFILE_H264_HIGH_10,              "High 10"               },
04345     { FF_PROFILE_H264_HIGH_10_INTRA,        "High 10 Intra"         },
04346     { FF_PROFILE_H264_HIGH_422,             "High 4:2:2"            },
04347     { FF_PROFILE_H264_HIGH_422_INTRA,       "High 4:2:2 Intra"      },
04348     { FF_PROFILE_H264_HIGH_444,             "High 4:4:4"            },
04349     { FF_PROFILE_H264_HIGH_444_PREDICTIVE,  "High 4:4:4 Predictive" },
04350     { FF_PROFILE_H264_HIGH_444_INTRA,       "High 4:4:4 Intra"      },
04351     { FF_PROFILE_H264_CAVLC_444,            "CAVLC 4:4:4"           },
04352     { FF_PROFILE_UNKNOWN },
04353 };
04354 
04355 AVCodec ff_h264_decoder = {
04356     .name           = "h264",
04357     .type           = AVMEDIA_TYPE_VIDEO,
04358     .id             = CODEC_ID_H264,
04359     .priv_data_size = sizeof(H264Context),
04360     .init           = ff_h264_decode_init,
04361     .close          = ff_h264_decode_end,
04362     .decode         = decode_frame,
04363     .capabilities   = /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
04364                       CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,
04365     .flush= flush_dpb,
04366     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
04367     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
04368     .update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),
04369     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04370 };
04371 
04372 #if CONFIG_H264_VDPAU_DECODER
04373 AVCodec ff_h264_vdpau_decoder = {
04374     .name           = "h264_vdpau",
04375     .type           = AVMEDIA_TYPE_VIDEO,
04376     .id             = CODEC_ID_H264,
04377     .priv_data_size = sizeof(H264Context),
04378     .init           = ff_h264_decode_init,
04379     .close          = ff_h264_decode_end,
04380     .decode         = decode_frame,
04381     .capabilities   = CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
04382     .flush= flush_dpb,
04383     .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
04384     .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
04385     .profiles = NULL_IF_CONFIG_SMALL(profiles),
04386 };
04387 #endif
Generated on Thu Jul 11 2013 15:38:19 for Libav by doxygen 1.7.1