libpostproc/postprocess_template.c File Reference

mmx/mmx2/3dnow postprocess code. More...

#include "libavutil/x86_cpu.h"

Go to the source code of this file.

Defines

#define ALIGN_MASK   "$-8"
#define REAL_PAVGB(a, b)   "pavgusb " #a ", " #b " \n\t"
#define PAVGB(a, b)   REAL_PAVGB(a,b)
#define PMINUB(b, a, t)
#define PMAXUB(a, b)
#define REAL_FIND_MIN_MAX(addr)
#define FIND_MIN_MAX(addr)   REAL_FIND_MIN_MAX(addr)
#define REAL_DERING_CORE(dst, src, ppsx, psx, sx, pplx, plx, lx, t0, t1)
#define DERING_CORE(dst, src, ppsx, psx, sx, pplx, plx, lx, t0, t1)   REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
#define REAL_DEINT_CUBIC(a, b, c, d, e)
#define DEINT_CUBIC(a, b, c, d, e)   REAL_DEINT_CUBIC(a,b,c,d,e)
#define REAL_DEINT_FF(a, b, c, d)
#define DEINT_FF(a, b, c, d)   REAL_DEINT_FF(a,b,c,d)
#define REAL_DEINT_L5(t1, t2, a, b, c)
#define DEINT_L5(t1, t2, a, b, c)   REAL_DEINT_L5(t1,t2,a,b,c)
#define REAL_MEDIAN(a, b, c)
#define MEDIAN(a, b, c)   REAL_MEDIAN(a,b,c)
#define FAST_L2_DIFF
#define REAL_L2_DIFF_CORE(a, b)
#define L2_DIFF_CORE(a, b)   REAL_L2_DIFF_CORE(a, b)
#define NEXT
#define PREV
#define REAL_SCALED_CPY(src1, src2, dst1, dst2)
#define SCALED_CPY(src1, src2, dst1, dst2)   REAL_SCALED_CPY(src1, src2, dst1, dst2)
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
#define SIMPLE_CPY(src1, src2, dst1, dst2)   REAL_SIMPLE_CPY(src1, src2, dst1, dst2)

Functions

static int vertClassify_3DNow (uint8_t src[], int stride, PPContext *c)
 Check if the middle 8x8 Block in the given 8x16 block is flat.
static void doVertLowPass_3DNow (uint8_t *src, int stride, PPContext *c)
 Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16.
static void vertX1Filter_3DNow (uint8_t *src, int stride, PPContext *co)
 Experimental Filter 1 will not damage linear gradients Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter can only smooth blocks at the expected locations (it cannot smooth them if they did move) MMX2 version does correct clipping C version does not.
static void doVertDefFilter_3DNow (uint8_t src[], int stride, PPContext *c)
static void dering_3DNow (uint8_t src[], int stride, PPContext *c)
static void deInterlaceInterpolateLinear_3DNow (uint8_t src[], int stride)
 Deinterlace the given block by linearly interpolating every second line.
static void deInterlaceInterpolateCubic_3DNow (uint8_t src[], int stride)
 Deinterlace the given block by cubic interpolating every second line.
static void deInterlaceFF_3DNow (uint8_t src[], int stride, uint8_t *tmp)
 Deinterlace the given block by filtering every second line with a (-1 4 2 4 -1) filter.
static void deInterlaceL5_3DNow (uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
 Deinterlace the given block by filtering every line with a (-1 2 6 2 -1) filter.
static void deInterlaceBlendLinear_3DNow (uint8_t src[], int stride, uint8_t *tmp)
 Deinterlace the given block by filtering all lines with a (1 2 1) filter.
static void deInterlaceMedian_3DNow (uint8_t src[], int stride)
 Deinterlace the given block by applying a median filter to every second line.
static void transpose1_3DNow (uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
 Transpose and shift the given 8x8 Block into dst1 and dst2.
static void transpose2_3DNow (uint8_t *dst, int dstStride, uint8_t *src)
 Transpose the given 8x8 block.
static void tempNoiseReducer_3DNow (uint8_t *src, int stride, uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
static av_always_inline void do_a_deblock_3DNow (uint8_t *src, int step, int stride, PPContext *c)
 accurate deblock filter
static void postProcess_3DNow (const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height, const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
 Filter array of bytes (Y or U or V values).
static void blockCopy_3DNow (uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, int levelFix, int64_t *packedOffsetAndScale)
 Copy a block from src to dst and fixes the blacklevel.
static void duplicate_3DNow (uint8_t src[], int stride)
 Duplicate the given 8 src pixels ? times upward.

Detailed Description

mmx/mmx2/3dnow postprocess code.

Definition in file postprocess_template.c.


Define Documentation

#define ALIGN_MASK   "$-8"

Definition at line 28 of file postprocess_template.c.

Referenced by dering_3DNow(), do_a_deblock_3DNow(), and doVertDefFilter_3DNow().

#define DEINT_CUBIC (   a,
  b,
  c,
  d,
  e 
)    REAL_DEINT_CUBIC(a,b,c,d,e)
#define DEINT_FF (   a,
  b,
  c,
  d 
)    REAL_DEINT_FF(a,b,c,d)

Referenced by deInterlaceFF_3DNow().

#define DEINT_L5 (   t1,
  t2,
  a,
  b,
  c 
)    REAL_DEINT_L5(t1,t2,a,b,c)

Referenced by deInterlaceL5_3DNow().

#define DERING_CORE (   dst,
  src,
  ppsx,
  psx,
  sx,
  pplx,
  plx,
  lx,
  t0,
  t1 
)    REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)

Referenced by dering_3DNow().

#define FAST_L2_DIFF
#define FIND_MIN_MAX (   addr  )     REAL_FIND_MIN_MAX(addr)

Referenced by dering_3DNow().

#define L2_DIFF_CORE (   a,
  b 
)    REAL_L2_DIFF_CORE(a, b)

Referenced by tempNoiseReducer_3DNow().

#define MEDIAN (   a,
  b,
  c 
)    REAL_MEDIAN(a,b,c)

Referenced by deInterlaceMedian_3DNow().

#define NEXT
Value:
"movq (%0), %%mm2                       \n\t"\
            "movq (%0), %%mm3                       \n\t"\
            "add %1, %0                             \n\t"\
            "punpcklbw %%mm4, %%mm2                 \n\t"\
            "punpckhbw %%mm4, %%mm3                 \n\t"\
            "paddw %%mm2, %%mm0                     \n\t"\
            "paddw %%mm3, %%mm1                     \n\t"

Referenced by do_a_deblock_3DNow().

#define PMAXUB (   a,
  b 
)
Value:
"psubusb " #a ", " #b " \n\t"\
    "paddb " #a ", " #b " \n\t"

Definition at line 54 of file postprocess_template.c.

Referenced by do_a_deblock_3DNow(), doVertDefFilter_3DNow(), and vertClassify_3DNow().

#define PMINUB (   b,
  a,
  t 
)
Value:
"movq " #a ", " #t " \n\t"\
    "psubusb " #b ", " #t " \n\t"\
    "psubb " #t ", " #a " \n\t"

Definition at line 45 of file postprocess_template.c.

Referenced by do_a_deblock_3DNow(), doVertDefFilter_3DNow(), and vertClassify_3DNow().

#define PREV
Value:
"movq (%0), %%mm2                       \n\t"\
            "movq (%0), %%mm3                       \n\t"\
            "add %1, %0                             \n\t"\
            "punpcklbw %%mm4, %%mm2                 \n\t"\
            "punpckhbw %%mm4, %%mm3                 \n\t"\
            "psubw %%mm2, %%mm0                     \n\t"\
            "psubw %%mm3, %%mm1                     \n\t"

Referenced by do_a_deblock_3DNow().

#define REAL_DEINT_CUBIC (   a,
  b,
  c,
  d,
  e 
)
Value:
"movq " #a ", %%mm0                     \n\t"\
        "movq " #b ", %%mm1                     \n\t"\
        "movq " #d ", %%mm2                     \n\t"\
        "movq " #e ", %%mm3                     \n\t"\
        PAVGB(%%mm2, %%mm1)                             /* (b+d) /2 */\
        PAVGB(%%mm3, %%mm0)                             /* a(a+e) /2 */\
        "movq %%mm0, %%mm2                      \n\t"\
        "punpcklbw %%mm7, %%mm0                 \n\t"\
        "punpckhbw %%mm7, %%mm2                 \n\t"\
        "movq %%mm1, %%mm3                      \n\t"\
        "punpcklbw %%mm7, %%mm1                 \n\t"\
        "punpckhbw %%mm7, %%mm3                 \n\t"\
        "psubw %%mm1, %%mm0                     \n\t"   /* L(a+e - (b+d))/2 */\
        "psubw %%mm3, %%mm2                     \n\t"   /* H(a+e - (b+d))/2 */\
        "psraw $3, %%mm0                        \n\t"   /* L(a+e - (b+d))/16 */\
        "psraw $3, %%mm2                        \n\t"   /* H(a+e - (b+d))/16 */\
        "psubw %%mm0, %%mm1                     \n\t"   /* L(9b + 9d - a - e)/16 */\
        "psubw %%mm2, %%mm3                     \n\t"   /* H(9b + 9d - a - e)/16 */\
        "packuswb %%mm3, %%mm1                  \n\t"\
        "movq %%mm1, " #c "                     \n\t"
#define REAL_DEINT_FF (   a,
  b,
  c,
  d 
)
Value:
"movq " #a ", %%mm1                     \n\t"\
        "movq " #b ", %%mm2                     \n\t"\
        "movq " #c ", %%mm3                     \n\t"\
        "movq " #d ", %%mm4                     \n\t"\
        PAVGB(%%mm3, %%mm1)                          \
        PAVGB(%%mm4, %%mm0)                          \
        "movq %%mm0, %%mm3                      \n\t"\
        "punpcklbw %%mm7, %%mm0                 \n\t"\
        "punpckhbw %%mm7, %%mm3                 \n\t"\
        "movq %%mm1, %%mm4                      \n\t"\
        "punpcklbw %%mm7, %%mm1                 \n\t"\
        "punpckhbw %%mm7, %%mm4                 \n\t"\
        "psllw $2, %%mm1                        \n\t"\
        "psllw $2, %%mm4                        \n\t"\
        "psubw %%mm0, %%mm1                     \n\t"\
        "psubw %%mm3, %%mm4                     \n\t"\
        "movq %%mm2, %%mm5                      \n\t"\
        "movq %%mm2, %%mm0                      \n\t"\
        "punpcklbw %%mm7, %%mm2                 \n\t"\
        "punpckhbw %%mm7, %%mm5                 \n\t"\
        "paddw %%mm2, %%mm1                     \n\t"\
        "paddw %%mm5, %%mm4                     \n\t"\
        "psraw $2, %%mm1                        \n\t"\
        "psraw $2, %%mm4                        \n\t"\
        "packuswb %%mm4, %%mm1                  \n\t"\
        "movq %%mm1, " #b "                     \n\t"\
#define REAL_DEINT_L5 (   t1,
  t2,
  a,
  b,
  c 
)
#define REAL_DERING_CORE (   dst,
  src,
  ppsx,
  psx,
  sx,
  pplx,
  plx,
  lx,
  t0,
  t1 
)
#define REAL_FIND_MIN_MAX (   addr  ) 
Value:
"movq " #addr ", %%mm0                  \n\t"\
        "movq %%mm7, %%mm1                      \n\t"\
        "psubusb %%mm0, %%mm6                   \n\t"\
        "paddb %%mm0, %%mm6                     \n\t"\
        "psubusb %%mm0, %%mm1                   \n\t"\
        "psubb %%mm1, %%mm7                     \n\t"
#define REAL_L2_DIFF_CORE (   a,
  b 
)
Value:
"movq " #a ", %%mm5                     \n\t"\
        "movq " #b ", %%mm2                     \n\t"\
        "pxor %%mm7, %%mm2                      \n\t"\
        PAVGB(%%mm2, %%mm5)\
        "paddb %%mm6, %%mm5                     \n\t"\
        "movq %%mm5, %%mm2                      \n\t"\
        "psllw $8, %%mm5                        \n\t"\
        "pmaddwd %%mm5, %%mm5                   \n\t"\
        "pmaddwd %%mm2, %%mm2                   \n\t"\
        "paddd %%mm2, %%mm5                     \n\t"\
        "psrld $14, %%mm5                       \n\t"\
        "paddd %%mm5, %%mm0                     \n\t"
#define REAL_MEDIAN (   a,
  b,
  c 
)
Value:
"movq " #a ", %%mm0                     \n\t"\
        "movq " #b ", %%mm2                     \n\t"\
        "movq " #c ", %%mm1                     \n\t"\
        "movq %%mm0, %%mm3                      \n\t"\
        "movq %%mm1, %%mm4                      \n\t"\
        "movq %%mm2, %%mm5                      \n\t"\
        "psubusb %%mm1, %%mm3                   \n\t"\
        "psubusb %%mm2, %%mm4                   \n\t"\
        "psubusb %%mm0, %%mm5                   \n\t"\
        "pcmpeqb %%mm7, %%mm3                   \n\t"\
        "pcmpeqb %%mm7, %%mm4                   \n\t"\
        "pcmpeqb %%mm7, %%mm5                   \n\t"\
        "movq %%mm3, %%mm6                      \n\t"\
        "pxor %%mm4, %%mm3                      \n\t"\
        "pxor %%mm5, %%mm4                      \n\t"\
        "pxor %%mm6, %%mm5                      \n\t"\
        "por %%mm3, %%mm1                       \n\t"\
        "por %%mm4, %%mm2                       \n\t"\
        "por %%mm5, %%mm0                       \n\t"\
        "pand %%mm2, %%mm0                      \n\t"\
        "pand %%mm1, %%mm0                      \n\t"\
        "movq %%mm0, " #b "                     \n\t"
#define REAL_PAVGB (   a,
  b 
)    "pavgusb " #a ", " #b " \n\t"

Definition at line 38 of file postprocess_template.c.

#define REAL_SCALED_CPY (   src1,
  src2,
  dst1,
  dst2 
)
Value:
"movq " #src1 ", %%mm0          \n\t"\
        "movq " #src1 ", %%mm5          \n\t"\
        "punpcklbw %%mm4, %%mm0         \n\t"\
        "punpckhbw %%mm4, %%mm5         \n\t"\
        "psubw %%mm2, %%mm0             \n\t"\
        "psubw %%mm2, %%mm5             \n\t"\
        "movq " #src2 ", %%mm1          \n\t"\
        "psllw $6, %%mm0                \n\t"\
        "psllw $6, %%mm5                \n\t"\
        "pmulhw %%mm3, %%mm0            \n\t"\
        "movq " #src2 ", %%mm6          \n\t"\
        "pmulhw %%mm3, %%mm5            \n\t"\
        "punpcklbw %%mm4, %%mm1         \n\t"\
        "punpckhbw %%mm4, %%mm6         \n\t"\
        "psubw %%mm2, %%mm1             \n\t"\
        "psubw %%mm2, %%mm6             \n\t"\
        "psllw $6, %%mm1                \n\t"\
        "psllw $6, %%mm6                \n\t"\
        "pmulhw %%mm3, %%mm1            \n\t"\
        "pmulhw %%mm3, %%mm6            \n\t"\
        "packuswb %%mm5, %%mm0          \n\t"\
        "packuswb %%mm6, %%mm1          \n\t"\
        "movq %%mm0, " #dst1 "          \n\t"\
        "movq %%mm1, " #dst2 "          \n\t"\
#define REAL_SIMPLE_CPY (   src1,
  src2,
  dst1,
  dst2 
)
Value:
"movq " #src1 ", %%mm0          \n\t"\
        "movq " #src2 ", %%mm1          \n\t"\
        "movq %%mm0, " #dst1 "          \n\t"\
        "movq %%mm1, " #dst2 "          \n\t"\
#define SCALED_CPY (   src1,
  src2,
  dst1,
  dst2 
)    REAL_SCALED_CPY(src1, src2, dst1, dst2)

Referenced by blockCopy_3DNow().

#define SIMPLE_CPY (   src1,
  src2,
  dst1,
  dst2 
)    REAL_SIMPLE_CPY(src1, src2, dst1, dst2)

Referenced by blockCopy_3DNow().


Function Documentation

static void blockCopy_3DNow ( uint8_t  dst[],
int  dstStride,
const uint8_t  src[],
int  srcStride,
int  levelFix,
int64_t *  packedOffsetAndScale 
) [inline, static]

Copy a block from src to dst and fixes the blacklevel.

levelFix == 0 -> do not touch the brighness & contrast

Definition at line 3016 of file postprocess_template.c.

static void deInterlaceBlendLinear_3DNow ( uint8_t  src[],
int  stride,
uint8_t *  tmp 
) [inline, static]

Deinterlace the given block by filtering all lines with a (1 2 1) filter.

will be called for every 8x8 block and can read & write from line 4-15 lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. lines 4-12 will be read into the deblocking filter and should be deinterlaced this filter will read lines 4-13 and write 4-11

Definition at line 1695 of file postprocess_template.c.

static void deInterlaceFF_3DNow ( uint8_t  src[],
int  stride,
uint8_t *  tmp 
) [inline, static]

Deinterlace the given block by filtering every second line with a (-1 4 2 4 -1) filter.

will be called for every 8x8 block and can read & write from line 4-15 lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. lines 4-12 will be read into the deblocking filter and should be deinterlaced this filter will read lines 4-13 and write 5-11

Definition at line 1515 of file postprocess_template.c.

static void deInterlaceInterpolateCubic_3DNow ( uint8_t  src[],
int  stride 
) [inline, static]

Deinterlace the given block by cubic interpolating every second line.

will be called for every 8x8 block and can read & write from line 4-15 lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. lines 4-12 will be read into the deblocking filter and should be deinterlaced this filter will read lines 3-15 and write 7-13

Definition at line 1451 of file postprocess_template.c.

static void deInterlaceInterpolateLinear_3DNow ( uint8_t  src[],
int  stride 
) [inline, static]

Deinterlace the given block by linearly interpolating every second line.

will be called for every 8x8 block and can read & write from line 4-15 lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. lines 4-12 will be read into the deblocking filter and should be deinterlaced

Definition at line 1398 of file postprocess_template.c.

static void deInterlaceL5_3DNow ( uint8_t  src[],
int  stride,
uint8_t *  tmp,
uint8_t *  tmp2 
) [inline, static]

Deinterlace the given block by filtering every line with a (-1 2 6 2 -1) filter.

will be called for every 8x8 block and can read & write from line 4-15 lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. lines 4-12 will be read into the deblocking filter and should be deinterlaced this filter will read lines 4-13 and write 4-11

Definition at line 1594 of file postprocess_template.c.

static void deInterlaceMedian_3DNow ( uint8_t  src[],
int  stride 
) [inline, static]

Deinterlace the given block by applying a median filter to every second line.

will be called for every 8x8 block and can read & write from line 4-15, lines 0-3 have been passed through the deblock / dering filters already, but can be read, too. lines 4-12 will be read into the deblocking filter and should be deinterlaced

Definition at line 1796 of file postprocess_template.c.

static void dering_3DNow ( uint8_t  src[],
int  stride,
PPContext c 
) [inline, static]

Definition at line 1044 of file postprocess_template.c.

static av_always_inline void do_a_deblock_3DNow ( uint8_t *  src,
int  step,
int  stride,
PPContext c 
) [static]

accurate deblock filter

Definition at line 2471 of file postprocess_template.c.

static void doVertDefFilter_3DNow ( uint8_t  src[],
int  stride,
PPContext c 
) [inline, static]

Definition at line 486 of file postprocess_template.c.

static void doVertLowPass_3DNow ( uint8_t *  src,
int  stride,
PPContext c 
) [inline, static]

Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16.

Definition at line 181 of file postprocess_template.c.

static void duplicate_3DNow ( uint8_t  src[],
int  stride 
) [inline, static]

Duplicate the given 8 src pixels ? times upward.

Definition at line 3144 of file postprocess_template.c.

static void postProcess_3DNow ( const uint8_t  src[],
int  srcStride,
uint8_t  dst[],
int  dstStride,
int  width,
int  height,
const QP_STORE_T  QPs[],
int  QPStride,
int  isColor,
PPContext c 
) [static]

Filter array of bytes (Y or U or V values).

Definition at line 3169 of file postprocess_template.c.

Referenced by postProcess().

static void tempNoiseReducer_3DNow ( uint8_t *  src,
int  stride,
uint8_t *  tempBlurred,
uint32_t *  tempBlurredPast,
int *  maxNoise 
) [inline, static]

Definition at line 2082 of file postprocess_template.c.

static void transpose1_3DNow ( uint8_t *  dst1,
uint8_t *  dst2,
uint8_t *  src,
int  srcStride 
) [inline, static]

Transpose and shift the given 8x8 Block into dst1 and dst2.

Definition at line 1917 of file postprocess_template.c.

static void transpose2_3DNow ( uint8_t *  dst,
int  dstStride,
uint8_t *  src 
) [inline, static]

Transpose the given 8x8 block.

Definition at line 2002 of file postprocess_template.c.

static int vertClassify_3DNow ( uint8_t  src[],
int  stride,
PPContext c 
) [inline, static]

Check if the middle 8x8 Block in the given 8x16 block is flat.

Definition at line 64 of file postprocess_template.c.

static void vertX1Filter_3DNow ( uint8_t *  src,
int  stride,
PPContext co 
) [inline, static]

Experimental Filter 1 will not damage linear gradients Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter can only smooth blocks at the expected locations (it cannot smooth them if they did move) MMX2 version does correct clipping C version does not.

Definition at line 360 of file postprocess_template.c.