#include "libavutil/cpu.h"
#include "libavutil/x86_cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/snow.h"
#include "libavcodec/dwt.h"
#include "dsputil_mmx.h"
Go to the source code of this file.
Defines | |
#define | snow_vertical_compose_sse2_load_add(op, r, t0, t1, t2, t3) |
#define | snow_vertical_compose_sse2_load(r, t0, t1, t2, t3) snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3) |
#define | snow_vertical_compose_sse2_add(r, t0, t1, t2, t3) snow_vertical_compose_sse2_load_add("paddw",r,t0,t1,t2,t3) |
#define | snow_vertical_compose_r2r_sub(s0, s1, s2, s3, t0, t1, t2, t3) |
#define | snow_vertical_compose_sse2_store(w, s0, s1, s2, s3) |
#define | snow_vertical_compose_sra(n, t0, t1, t2, t3) |
#define | snow_vertical_compose_r2r_add(s0, s1, s2, s3, t0, t1, t2, t3) |
#define | snow_vertical_compose_r2r_pmulhw(s0, s1, s2, s3, t0, t1, t2, t3) |
#define | snow_vertical_compose_sse2_move(s0, s1, s2, s3, t0, t1, t2, t3) |
#define | snow_vertical_compose_mmx_load_add(op, r, t0, t1, t2, t3) |
#define | snow_vertical_compose_mmx_load(r, t0, t1, t2, t3) snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3) |
#define | snow_vertical_compose_mmx_add(r, t0, t1, t2, t3) snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3) |
#define | snow_vertical_compose_mmx_store(w, s0, s1, s2, s3) |
#define | snow_vertical_compose_mmx_move(s0, s1, s2, s3, t0, t1, t2, t3) |
#define | snow_inner_add_yblock_sse2_header |
#define | snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset) |
#define | snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset) |
#define | snow_inner_add_yblock_sse2_accum_8(ptr_offset, s_offset) |
#define | snow_inner_add_yblock_sse2_accum_16(ptr_offset, s_offset) |
#define | snow_inner_add_yblock_sse2_end_common1 |
#define | snow_inner_add_yblock_sse2_end_common2 |
#define | snow_inner_add_yblock_sse2_end_8 |
#define | snow_inner_add_yblock_sse2_end_16 |
#define | snow_inner_add_yblock_mmx_header |
#define | snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset) |
#define | snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) |
#define | snow_inner_add_yblock_mmx_mix(read_offset, write_offset) |
#define | snow_inner_add_yblock_mmx_end(s_step) |
Functions | |
static void | ff_snow_horizontal_compose97i_sse2 (IDWTELEM *b, int width) |
static void | ff_snow_horizontal_compose97i_mmx (IDWTELEM *b, int width) |
static void | ff_snow_vertical_compose97i_sse2 (IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width) |
static void | ff_snow_vertical_compose97i_mmx (IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width) |
static void | inner_add_yblock_bw_8_obmc_16_bh_even_sse2 (const uint8_t *obmc, const x86_reg obmc_stride, uint8_t **block, int b_w, x86_reg b_h, int src_x, int src_y, x86_reg src_stride, slice_buffer *sb, int add, uint8_t *dst8) |
static void | inner_add_yblock_bw_16_obmc_32_sse2 (const uint8_t *obmc, const x86_reg obmc_stride, uint8_t **block, int b_w, x86_reg b_h, int src_x, int src_y, x86_reg src_stride, slice_buffer *sb, int add, uint8_t *dst8) |
static void | inner_add_yblock_bw_8_obmc_16_mmx (const uint8_t *obmc, const x86_reg obmc_stride, uint8_t **block, int b_w, x86_reg b_h, int src_x, int src_y, x86_reg src_stride, slice_buffer *sb, int add, uint8_t *dst8) |
static void | inner_add_yblock_bw_16_obmc_32_mmx (const uint8_t *obmc, const x86_reg obmc_stride, uint8_t **block, int b_w, x86_reg b_h, int src_x, int src_y, x86_reg src_stride, slice_buffer *sb, int add, uint8_t *dst8) |
static void | ff_snow_inner_add_yblock_sse2 (const uint8_t *obmc, const int obmc_stride, uint8_t **block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer *sb, int add, uint8_t *dst8) |
static void | ff_snow_inner_add_yblock_mmx (const uint8_t *obmc, const int obmc_stride, uint8_t **block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer *sb, int add, uint8_t *dst8) |
void | ff_dwt_init_x86 (DWTContext *c) |
#define snow_inner_add_yblock_mmx_accum | ( | ptr_offset, | ||
s_offset, | ||||
d_offset | ||||
) |
snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\ "paddusw %%mm2, %%mm1 \n\t"\ "paddusw %%mm6, %%mm5 \n\t"
Definition at line 791 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_mmx(), and inner_add_yblock_bw_8_obmc_16_mmx().
#define snow_inner_add_yblock_mmx_end | ( | s_step | ) |
"add $"s_step", %%"REG_S" \n\t"\ "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\ "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\ "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\ "add %%"REG_c", (%%"REG_a") \n\t"\ "add"OPSIZE " $"PTR_SIZE"*1, %1 \n\t"\ "add %%"REG_c", %0 \n\t"\ "dec %2 \n\t"\ "jnz 1b \n\t"\ :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\ :\ "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\ "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
Definition at line 809 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_mmx(), and inner_add_yblock_bw_8_obmc_16_mmx().
#define snow_inner_add_yblock_mmx_header |
IDWTELEM * * dst_array = sb->line + src_y;\ x86_reg tmp;\ __asm__ volatile(\ "mov %7, %%"REG_c" \n\t"\ "mov %6, %2 \n\t"\ "mov %4, %%"REG_S" \n\t"\ "pxor %%mm7, %%mm7 \n\t" /* 0 */\ "pcmpeqd %%mm3, %%mm3 \n\t"\ "psllw $15, %%mm3 \n\t"\ "psrlw $12, %%mm3 \n\t" /* FRAC_BITS >> 1 */\ "1: \n\t"\ "mov %1, %%"REG_D" \n\t"\ "mov (%%"REG_D"), %%"REG_D" \n\t"\ "add %3, %%"REG_D" \n\t"
Definition at line 762 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_mmx(), and inner_add_yblock_bw_8_obmc_16_mmx().
#define snow_inner_add_yblock_mmx_mix | ( | read_offset, | ||
write_offset | ||||
) |
"mov %0, %%"REG_d" \n\t"\ "psrlw $4, %%mm1 \n\t"\ "psrlw $4, %%mm5 \n\t"\ "paddw "read_offset"(%%"REG_D"), %%mm1 \n\t"\ "paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\ "paddw %%mm3, %%mm1 \n\t"\ "paddw %%mm3, %%mm5 \n\t"\ "psraw $4, %%mm1 \n\t"\ "psraw $4, %%mm5 \n\t"\ "packuswb %%mm5, %%mm1 \n\t"\ "movq %%mm1, "write_offset"(%%"REG_d") \n\t"
Definition at line 796 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_mmx(), and inner_add_yblock_bw_8_obmc_16_mmx().
#define snow_inner_add_yblock_mmx_start | ( | out_reg1, | ||
out_reg2, | ||||
ptr_offset, | ||||
s_offset, | ||||
d_offset | ||||
) |
"mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\ "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\ "punpcklbw %%mm7, %%"out_reg1" \n\t"\ "punpcklbw %%mm7, %%"out_reg2" \n\t"\ "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\ "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\ "punpcklbw %%mm7, %%mm0 \n\t"\ "punpcklbw %%mm7, %%mm4 \n\t"\ "pmullw %%mm0, %%"out_reg1" \n\t"\ "pmullw %%mm4, %%"out_reg2" \n\t"
Definition at line 778 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_mmx(), and inner_add_yblock_bw_8_obmc_16_mmx().
#define snow_inner_add_yblock_sse2_accum_16 | ( | ptr_offset, | ||
s_offset | ||||
) |
snow_inner_add_yblock_sse2_start_16("xmm2", "xmm6", ptr_offset, s_offset)\ "paddusw %%xmm2, %%xmm1 \n\t"\ "paddusw %%xmm6, %%xmm5 \n\t"
Definition at line 656 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_sse2().
#define snow_inner_add_yblock_sse2_accum_8 | ( | ptr_offset, | ||
s_offset | ||||
) |
snow_inner_add_yblock_sse2_start_8("xmm2", "xmm6", ptr_offset, s_offset)\ "paddusw %%xmm2, %%xmm1 \n\t"\ "paddusw %%xmm6, %%xmm5 \n\t"
Definition at line 651 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_8_obmc_16_bh_even_sse2().
#define snow_inner_add_yblock_sse2_end_16 |
"add $"PTR_SIZE"*1, %1 \n\t"\ snow_inner_add_yblock_sse2_end_common1\ "dec %2 \n\t"\ snow_inner_add_yblock_sse2_end_common2
Definition at line 684 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_sse2().
#define snow_inner_add_yblock_sse2_end_8 |
"sal $1, %%"REG_c" \n\t"\ "add $"PTR_SIZE"*2, %1 \n\t"\ snow_inner_add_yblock_sse2_end_common1\ "sar $1, %%"REG_c" \n\t"\ "sub $2, %2 \n\t"\ snow_inner_add_yblock_sse2_end_common2
Definition at line 676 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_8_obmc_16_bh_even_sse2().
#define snow_inner_add_yblock_sse2_end_common1 |
#define snow_inner_add_yblock_sse2_end_common2 |
#define snow_inner_add_yblock_sse2_header |
IDWTELEM * * dst_array = sb->line + src_y;\ x86_reg tmp;\ __asm__ volatile(\ "mov %7, %%"REG_c" \n\t"\ "mov %6, %2 \n\t"\ "mov %4, %%"REG_S" \n\t"\ "pxor %%xmm7, %%xmm7 \n\t" /* 0 */\ "pcmpeqd %%xmm3, %%xmm3 \n\t"\ "psllw $15, %%xmm3 \n\t"\ "psrlw $12, %%xmm3 \n\t" /* FRAC_BITS >> 1 */\ "1: \n\t"\ "mov %1, %%"REG_D" \n\t"\ "mov (%%"REG_D"), %%"REG_D" \n\t"\ "add %3, %%"REG_D" \n\t"
Definition at line 609 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_sse2(), and inner_add_yblock_bw_8_obmc_16_bh_even_sse2().
#define snow_inner_add_yblock_sse2_start_16 | ( | out_reg1, | ||
out_reg2, | ||||
ptr_offset, | ||||
s_offset | ||||
) |
"mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ "movq (%%"REG_d"), %%"out_reg1" \n\t"\ "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\ "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\ "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\ "punpcklbw %%xmm7, %%xmm0 \n\t"\ "punpcklbw %%xmm7, %%xmm4 \n\t"\ "pmullw %%xmm0, %%"out_reg1" \n\t"\ "pmullw %%xmm4, %%"out_reg2" \n\t"
Definition at line 638 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_16_obmc_32_sse2().
#define snow_inner_add_yblock_sse2_start_8 | ( | out_reg1, | ||
out_reg2, | ||||
ptr_offset, | ||||
s_offset | ||||
) |
"mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\ "movq (%%"REG_d"), %%"out_reg1" \n\t"\ "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\ "punpcklbw %%xmm7, %%"out_reg1" \n\t"\ "punpcklbw %%xmm7, %%"out_reg2" \n\t"\ "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\ "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\ "punpcklbw %%xmm7, %%xmm0 \n\t"\ "punpcklbw %%xmm7, %%xmm4 \n\t"\ "pmullw %%xmm0, %%"out_reg1" \n\t"\ "pmullw %%xmm4, %%"out_reg2" \n\t"
Definition at line 625 of file snowdsp_mmx.c.
Referenced by inner_add_yblock_bw_8_obmc_16_bh_even_sse2().
#define snow_vertical_compose_mmx_add | ( | r, | ||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) | snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3) |
Definition at line 523 of file snowdsp_mmx.c.
Referenced by ff_snow_vertical_compose97i_mmx().
#define snow_vertical_compose_mmx_load | ( | r, | ||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) | snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3) |
Definition at line 520 of file snowdsp_mmx.c.
Referenced by ff_snow_vertical_compose97i_mmx().
#define snow_vertical_compose_mmx_load_add | ( | op, | ||
r, | ||||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) |
#define snow_vertical_compose_mmx_move | ( | s0, | ||
s1, | ||||
s2, | ||||
s3, | ||||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) |
#define snow_vertical_compose_mmx_store | ( | w, | ||
s0, | ||||
s1, | ||||
s2, | ||||
s3 | ||||
) |
#define snow_vertical_compose_r2r_add | ( | s0, | ||
s1, | ||||
s2, | ||||
s3, | ||||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) |
"paddw %%"s0", %%"t0" \n\t"\ "paddw %%"s1", %%"t1" \n\t"\ "paddw %%"s2", %%"t2" \n\t"\ "paddw %%"s3", %%"t3" \n\t"
Definition at line 423 of file snowdsp_mmx.c.
Referenced by ff_snow_vertical_compose97i_mmx(), and ff_snow_vertical_compose97i_sse2().
#define snow_vertical_compose_r2r_pmulhw | ( | s0, | ||
s1, | ||||
s2, | ||||
s3, | ||||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) |
"pmulhw %%"s0", %%"t0" \n\t"\ "pmulhw %%"s1", %%"t1" \n\t"\ "pmulhw %%"s2", %%"t2" \n\t"\ "pmulhw %%"s3", %%"t3" \n\t"
Definition at line 429 of file snowdsp_mmx.c.
Referenced by ff_snow_vertical_compose97i_mmx(), and ff_snow_vertical_compose97i_sse2().
#define snow_vertical_compose_r2r_sub | ( | s0, | ||
s1, | ||||
s2, | ||||
s3, | ||||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) |
"psubw %%"s0", %%"t0" \n\t"\ "psubw %%"s1", %%"t1" \n\t"\ "psubw %%"s2", %%"t2" \n\t"\ "psubw %%"s3", %%"t3" \n\t"
Definition at line 405 of file snowdsp_mmx.c.
Referenced by ff_snow_vertical_compose97i_mmx(), and ff_snow_vertical_compose97i_sse2().
#define snow_vertical_compose_sra | ( | n, | ||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) |
"psraw $"n", %%"t0" \n\t"\ "psraw $"n", %%"t1" \n\t"\ "psraw $"n", %%"t2" \n\t"\ "psraw $"n", %%"t3" \n\t"
Definition at line 417 of file snowdsp_mmx.c.
Referenced by ff_snow_vertical_compose97i_mmx(), and ff_snow_vertical_compose97i_sse2().
#define snow_vertical_compose_sse2_add | ( | r, | ||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) | snow_vertical_compose_sse2_load_add("paddw",r,t0,t1,t2,t3) |
Definition at line 402 of file snowdsp_mmx.c.
Referenced by ff_snow_vertical_compose97i_sse2().
#define snow_vertical_compose_sse2_load | ( | r, | ||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) | snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3) |
Definition at line 399 of file snowdsp_mmx.c.
Referenced by ff_snow_vertical_compose97i_sse2().
#define snow_vertical_compose_sse2_load_add | ( | op, | ||
r, | ||||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) |
#define snow_vertical_compose_sse2_move | ( | s0, | ||
s1, | ||||
s2, | ||||
s3, | ||||
t0, | ||||
t1, | ||||
t2, | ||||
t3 | ||||
) |
#define snow_vertical_compose_sse2_store | ( | w, | ||
s0, | ||||
s1, | ||||
s2, | ||||
s3 | ||||
) |
void ff_dwt_init_x86 | ( | DWTContext * | c | ) |
Definition at line 876 of file snowdsp_mmx.c.
Referenced by ff_dwt_init().
static void ff_snow_horizontal_compose97i_mmx | ( | IDWTELEM * | b, | |
int | width | |||
) | [static] |
Definition at line 218 of file snowdsp_mmx.c.
static void ff_snow_horizontal_compose97i_sse2 | ( | IDWTELEM * | b, | |
int | width | |||
) | [static] |
Definition at line 29 of file snowdsp_mmx.c.
static void ff_snow_inner_add_yblock_mmx | ( | const uint8_t * | obmc, | |
const int | obmc_stride, | |||
uint8_t ** | block, | |||
int | b_w, | |||
int | b_h, | |||
int | src_x, | |||
int | src_y, | |||
int | src_stride, | |||
slice_buffer * | sb, | |||
int | add, | |||
uint8_t * | dst8 | |||
) | [static] |
Definition at line 866 of file snowdsp_mmx.c.
static void ff_snow_inner_add_yblock_sse2 | ( | const uint8_t * | obmc, | |
const int | obmc_stride, | |||
uint8_t ** | block, | |||
int | b_w, | |||
int | b_h, | |||
int | src_x, | |||
int | src_y, | |||
int | src_stride, | |||
slice_buffer * | sb, | |||
int | add, | |||
uint8_t * | dst8 | |||
) | [static] |
Definition at line 852 of file snowdsp_mmx.c.
static void ff_snow_vertical_compose97i_mmx | ( | IDWTELEM * | b0, | |
IDWTELEM * | b1, | |||
IDWTELEM * | b2, | |||
IDWTELEM * | b3, | |||
IDWTELEM * | b4, | |||
IDWTELEM * | b5, | |||
int | width | |||
) | [static] |
Definition at line 539 of file snowdsp_mmx.c.
static void ff_snow_vertical_compose97i_sse2 | ( | IDWTELEM * | b0, | |
IDWTELEM * | b1, | |||
IDWTELEM * | b2, | |||
IDWTELEM * | b3, | |||
IDWTELEM * | b4, | |||
IDWTELEM * | b5, | |||
int | width | |||
) | [static] |
Definition at line 441 of file snowdsp_mmx.c.
static void inner_add_yblock_bw_16_obmc_32_mmx | ( | const uint8_t * | obmc, | |
const x86_reg | obmc_stride, | |||
uint8_t ** | block, | |||
int | b_w, | |||
x86_reg | b_h, | |||
int | src_x, | |||
int | src_y, | |||
x86_reg | src_stride, | |||
slice_buffer * | sb, | |||
int | add, | |||
uint8_t * | dst8 | |||
) | [static] |
Definition at line 835 of file snowdsp_mmx.c.
Referenced by ff_snow_inner_add_yblock_mmx().
static void inner_add_yblock_bw_16_obmc_32_sse2 | ( | const uint8_t * | obmc, | |
const x86_reg | obmc_stride, | |||
uint8_t ** | block, | |||
int | b_w, | |||
x86_reg | b_h, | |||
int | src_x, | |||
int | src_y, | |||
x86_reg | src_stride, | |||
slice_buffer * | sb, | |||
int | add, | |||
uint8_t * | dst8 | |||
) | [static] |
Definition at line 738 of file snowdsp_mmx.c.
Referenced by ff_snow_inner_add_yblock_sse2().
static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2 | ( | const uint8_t * | obmc, | |
const x86_reg | obmc_stride, | |||
uint8_t ** | block, | |||
int | b_w, | |||
x86_reg | b_h, | |||
int | src_x, | |||
int | src_y, | |||
x86_reg | src_stride, | |||
slice_buffer * | sb, | |||
int | add, | |||
uint8_t * | dst8 | |||
) | [static] |
Definition at line 690 of file snowdsp_mmx.c.
Referenced by ff_snow_inner_add_yblock_sse2().
static void inner_add_yblock_bw_8_obmc_16_mmx | ( | const uint8_t * | obmc, | |
const x86_reg | obmc_stride, | |||
uint8_t ** | block, | |||
int | b_w, | |||
x86_reg | b_h, | |||
int | src_x, | |||
int | src_y, | |||
x86_reg | src_stride, | |||
slice_buffer * | sb, | |||
int | add, | |||
uint8_t * | dst8 | |||
) | [static] |
Definition at line 824 of file snowdsp_mmx.c.
Referenced by ff_snow_inner_add_yblock_mmx(), and ff_snow_inner_add_yblock_sse2().