0

Reverting 26070 from media tests failure

6 failed media_unittests
- CreateFrame
- Clamp
- YV12
- YV16
- YV12
- YV16

TBR=fbarchard
BUG=none
TEST=none

Review URL: http://codereview.chromium.org/202068

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@26074 0039d316-1c4b-4281-b951-d872f2087c98
This commit is contained in:
mhm@chromium.org
2009-09-12 03:53:20 +00:00
parent eb268a94c2
commit 8e5722d231
4 changed files with 340 additions and 130 deletions

@ -42,7 +42,7 @@ unsigned int hash(unsigned char *s, size_t len, unsigned int hash = 5381) {
// Set to 100 to time ConvertYUVToRGB32.
// This will take approximately 40 to 200 ms.
static const int kTestTimes = 100;
static const int kTestTimes = 1;
TEST(YUVConvertTest, YV12) {
// Allocate all surfaces.

@ -63,8 +63,9 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
} // extern "C"
#if !defined(USE_MMX)
// Windows, Mac and Linux x86 use MMX; x64 and other CPUs do not.
#if defined(OS_WIN) || defined(ARCH_CPU_X86)
#if defined(_MSC_VER)
#define USE_MMX 1
#elif defined(OS_LINUX) && defined(ARCH_CPU_X86)
#define USE_MMX 1
#else
#define USE_MMX 0

@ -300,9 +300,9 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"jmp Lconvertend\n"
"jmp convertend\n"
"Lconvertloop:"
"convertloop:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
@ -321,12 +321,12 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"Lconvertend:"
"convertend:"
"sub $0x2,%ecx\n"
"jns Lconvertloop\n"
"jns convertloop\n"
"and $0x1,%ecx\n"
"je Lconvertdone\n"
"je convertdone\n"
"movzbl (%edi),%eax\n"
"movq kCoefficientsRgbU(,%eax,8),%mm0\n"
@ -338,7 +338,7 @@ void FastConvertYUVToRGB32Row(const uint8* y_buf,
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"Lconvertdone:"
"convertdone:"
"popa\n"
"ret\n"
);
@ -361,9 +361,9 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp Lscaleend\n"
"jmp scaleend\n"
"Lscaleloop:"
"scaleloop:"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
@ -389,12 +389,12 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"Lscaleend:"
"scaleend:"
"sub $0x2,%ecx\n"
"jns Lscaleloop\n"
"jns scaleloop\n"
"and $0x1,%ecx\n"
"je Lscaledone\n"
"je scaledone\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
@ -419,7 +419,7 @@ void ScaleYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"Lscaledone:"
"scaledone:"
"popa\n"
"ret\n"
);

@ -4,9 +4,17 @@
#include "media/base/yuv_row.h"
#ifdef _DEBUG
#include "base/logging.h"
#else
#define DCHECK(a)
#endif
// TODO(fbarchard): Make MMX work in DLLs. Currently only works in unittests.
// TODO(fbarchard): Do 64 bit version.
extern "C" {
#if USE_MMX
#define RGBY(i) { \
static_cast<int16>(1.164 * 64 * (i - 16) + 0.5), \
@ -29,8 +37,7 @@ extern "C" {
0 \
}
#define MMX_ALIGNED(var) \
var __attribute__ ((section ("__TEXT,__text"))) __attribute__ ((aligned(16)))
#define MMX_ALIGNED(var) var __attribute__((aligned(16)))
MMX_ALIGNED(int16 kCoefficientsRgbY[256][4]) = {
RGBY(0x00), RGBY(0x01), RGBY(0x02), RGBY(0x03),
@ -238,36 +245,75 @@ MMX_ALIGNED(int16 kCoefficientsRgbV[256][4]) = {
#undef RGBV
#undef MMX_ALIGNED
extern void MacConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int16 *kCoefficientsRgbY);
// TODO(fbarchard): Use the following function instead of
// pure assembly to help make code more portable to 64 bit
// and Mac, which has different labels.
// no-gcse eliminates the frame pointer, freeing up ebp.
#if defined(FUTURE_64BIT_VERSION)
void __attribute__((optimize("O2", "no-gcse")))
NewFastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
asm(
"shr %4\n"
"1:\n"
"movzb (%1),%%eax\n"
"add $0x1,%1\n"
"movzb (%2),%%ebx\n"
"add $0x1,%2\n"
"movq kCoefficientsRgbU(,%%eax,8),%%mm0\n"
"movzb (%0),%%eax\n"
"paddsw kCoefficientsRgbV(,%%ebx,8),%%mm0\n"
"movzb 0x1(%0),%%ebx\n"
"movq kCoefficientsRgbY(,%%eax,8),%%mm1\n"
"add $0x2,%0\n"
"movq kCoefficientsRgbY(,%%ebx,8),%%mm2\n"
"paddsw %%mm0,%%mm1\n"
"paddsw %%mm0,%%mm2\n"
"psraw $0x6,%%mm1\n"
"psraw $0x6,%%mm2\n"
"packuswb %%mm2,%%mm1\n"
"movntq %%mm1,0x0(%3)\n"
"add $0x8,%3\n"
"sub $0x1,%4\n"
"jne 1b\n"
: : "r"(y_buf),"r"(u_buf),"r"(v_buf),"r"(rgb_buf),"r"(width)
: "eax","ebx");
}
#endif
extern void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width);
__asm__(
" .globl _MacConvertYUVToRGB32Row\n"
"_MacConvertYUVToRGB32Row:\n"
" .globl _FastConvertYUVToRGB32Row\n"
"_FastConvertYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x38(%esp),%ecx\n"
"mov 0x34(%esp),%ecx\n"
"jmp convertend\n"
"jmp Lconvertend\n"
"Lconvertloop:"
"convertloop:"
"movzbl (%edi),%eax\n"
"add $0x1,%edi\n"
"movzbl (%esi),%ebx\n"
"add $0x1,%esi\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"movq _kCoefficientsRgbU(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"paddsw 4096(%ecx,%ebx,8),%mm0\n"
"paddsw _kCoefficientsRgbV(,%ebx,8),%mm0\n"
"movzbl 0x1(%edx),%ebx\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"movq _kCoefficientsRgbY(,%eax,8),%mm1\n"
"add $0x2,%edx\n"
"movq 0(%ecx,%ebx,8),%mm2\n"
"movq _kCoefficientsRgbY(,%ebx,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
@ -275,129 +321,292 @@ extern void MacConvertYUVToRGB32Row(const uint8* y_buf,
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"Lconvertend:"
"sub $0x2,0x34(%esp)\n"
"jns Lconvertloop\n"
"convertend:"
"sub $0x2,%ecx\n"
"jns convertloop\n"
"and $0x1,0x34(%esp)\n"
"je Lconvertdone\n"
"and $0x1,%ecx\n"
"je convertdone\n"
"movzbl (%edi),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"movq _kCoefficientsRgbU(,%eax,8),%mm0\n"
"movzbl (%esi),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"paddsw _kCoefficientsRgbV(,%eax,8),%mm0\n"
"movzbl (%edx),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"movq _kCoefficientsRgbY(,%eax,8),%mm1\n"
"paddsw %mm0,%mm1\n"
"psraw $0x6,%mm1\n"
"packuswb %mm1,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"Lconvertdone:\n"
"convertdone:"
"popa\n"
"ret\n"
);
extern void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx);
__asm__(
" .globl _ScaleYUVToRGB32Row\n"
"_ScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x34(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp scaleend\n"
"scaleloop:"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbU(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbV(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"scaleend:"
"sub $0x2,%ecx\n"
"jns scaleloop\n"
"and $0x1,%ecx\n"
"je scaledone\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq kCoefficientsRgbU(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw kCoefficientsRgbV(,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq kCoefficientsRgbY(,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"scaledone:"
"popa\n"
"ret\n"
);
#else // USE_MMX
// Reference version of YUV converter.
static const int kClipTableSize = 256;
static const int kClipOverflow = 288; // Cb max is 535.
static uint8 kRgbClipTable[kClipOverflow +
kClipTableSize +
kClipOverflow] = {
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 288 underflow values
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // clipped to 0.
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, // Unclipped values.
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F,
0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF,
0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7,
0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF,
0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7,
0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF,
0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7,
0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF,
0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF,
0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7,
0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 288 overflow values
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // clipped to 255.
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
// Clip an rgb channel value to 0..255 range.
// Source is signed fixed point 8.8.
// Table allows for values to underflow or overflow by 128.
// Therefore source range is -128 to 384.
// Output clips to unsigned 0 to 255.
static inline uint32 clip(int32 value) {
DCHECK(((value >> 8) + kClipOverflow) >= 0);
DCHECK(((value >> 8) + kClipOverflow) <
(kClipOverflow + kClipTableSize + kClipOverflow));
return static_cast<uint32>(kRgbClipTable[((value) >> 8) + kClipOverflow]);
}
static inline void YuvPixel(uint8 y,
uint8 u,
uint8 v,
uint8* rgb_buf) {
int32 d = static_cast<int32>(u) - 128;
int32 e = static_cast<int32>(v) - 128;
int32 cb = (516 * d + 128);
int32 cg = (- 100 * d - 208 * e + 128);
int32 cr = (409 * e + 128);
int32 C298a = ((static_cast<int32>(y) - 16) * 298 + 128);
*reinterpret_cast<uint32*>(rgb_buf) = (clip(C298a + cb)) |
(clip(C298a + cg) << 8) |
(clip(C298a + cr) << 16) |
(0xff000000);
}
void FastConvertYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width) {
MacConvertYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width,
&kCoefficientsRgbY[0][0]);
for (int x = 0; x < width; x += 2) {
uint8 u = u_buf[x >> 1];
uint8 v = v_buf[x >> 1];
uint8 y0 = y_buf[x];
YuvPixel(y0, u, v, rgb_buf);
if ((x + 1) < width) {
uint8 y1 = y_buf[x + 1];
YuvPixel(y1, u, v, rgb_buf + 4);
}
rgb_buf += 8; // Advance 2 pixels.
}
}
extern void MacScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx,
int16 *kCoefficientsRgbY);
__asm__(
" .globl _MacScaleYUVToRGB32Row\n"
"_MacScaleYUVToRGB32Row:\n"
"pusha\n"
"mov 0x24(%esp),%edx\n"
"mov 0x28(%esp),%edi\n"
"mov 0x2c(%esp),%esi\n"
"mov 0x30(%esp),%ebp\n"
"mov 0x3c(%esp),%ecx\n"
"xor %ebx,%ebx\n"
"jmp Lscaleend\n"
"Lscaleloop:"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"add 0x38(%esp),%ebx\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movntq %mm1,0x0(%ebp)\n"
"add $0x8,%ebp\n"
"Lscaleend:"
"sub $0x2,0x34(%esp)\n"
"jns Lscaleloop\n"
"and $0x1,0x34(%esp)\n"
"je Lscaledone\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%edi,%eax,1),%eax\n"
"movq 2048(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x5,%eax\n"
"movzbl (%esi,%eax,1),%eax\n"
"paddsw 4096(%ecx,%eax,8),%mm0\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm1\n"
"mov %ebx,%eax\n"
"sar $0x4,%eax\n"
"movzbl (%edx,%eax,1),%eax\n"
"movq 0(%ecx,%eax,8),%mm2\n"
"paddsw %mm0,%mm1\n"
"paddsw %mm0,%mm2\n"
"psraw $0x6,%mm1\n"
"psraw $0x6,%mm2\n"
"packuswb %mm2,%mm1\n"
"movd %mm1,0x0(%ebp)\n"
"Lscaledone:"
"popa\n"
"ret\n"
);
// 28.4 fixed point is used. A shift by 4 isolates the integer.
// A shift by 5 is used to further subsample the chrominence channels.
// & 15 isolates the fixed point fraction. >> 2 to get the upper 2 bits,
// for 1/4 pixel accurate interpolation.
void ScaleYUVToRGB32Row(const uint8* y_buf,
const uint8* u_buf,
const uint8* v_buf,
uint8* rgb_buf,
int width,
int scaled_dx) {
MacScaleYUVToRGB32Row(y_buf, u_buf, v_buf, rgb_buf, width, scaled_dx,
&kCoefficientsRgbY[0][0]);
int scaled_x = 0;
for (int x = 0; x < width; ++x) {
uint8 u = u_buf[scaled_x >> 5];
uint8 v = v_buf[scaled_x >> 5];
uint8 y0 = y_buf[scaled_x >> 4];
YuvPixel(y0, u, v, rgb_buf);
rgb_buf += 4;
scaled_x += scaled_dx;
}
}
#endif // USE_MMX
} // extern "C"