23 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
25 #include "SDL_stdinc.h"
31 static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} };
32 static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} };
33 static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} };
35 static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
37 static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
38 static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
39 static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
40 static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
42 static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
43 static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
44 static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
45 static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
47 static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
48 static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
79 void ColorRGBDitherYV12MMX1X(
int *colortab,
Uint32 *rgb_2_pix,
80 unsigned char *lum,
unsigned char *cr,
81 unsigned char *cb,
unsigned char *out,
82 int rows,
int cols,
int mod )
87 unsigned char*
y = lum +cols*rows;
90 row2 = (
Uint32 *)out+cols+mod;
91 mod = (mod+cols+mod)*4;
93 __asm__ __volatile__ (
99 "movl %%ebx,4(%%esp)\n"
107 "movl 4(%%esp),%%ebx\n"
108 "movd (%%ebx),%%mm1\n"
112 "punpcklbw %%mm7,%%mm1\n"
113 "punpckldq %%mm1,%%mm1\n"
125 "movq (%2,%4),%%mm3\n"
126 "punpckldq %%mm3,%%mm2\n"
135 "paddsw %%mm1, %%mm5\n"
136 "paddsw %%mm1, %%mm6\n"
137 "packuswb %%mm5,%%mm5\n"
138 "packuswb %%mm6,%%mm6\n"
140 "punpcklbw %%mm5,%%mm6\n"
144 "punpcklbw %%mm7,%%mm1\n"
145 "punpckldq %%mm1,%%mm1\n"
158 "paddsw %%mm5, %%mm3\n"
159 "paddsw %%mm5, %%mm7\n"
160 "paddsw %%mm0, %%mm3\n"
161 "paddsw %%mm0, %%mm7\n"
162 "packuswb %%mm3,%%mm3\n"
163 "packuswb %%mm7,%%mm7\n"
164 "punpcklbw %%mm3,%%mm7\n"
169 "paddsw %%mm1, %%mm3\n"
170 "paddsw %%mm1, %%mm5\n"
171 "packuswb %%mm3,%%mm3\n"
172 "packuswb %%mm5,%%mm5\n"
173 "punpcklbw %%mm3,%%mm5\n"
183 "punpcklbw %%mm4,%%mm1\n"
184 "punpcklbw %%mm4,%%mm3\n"
187 "punpcklwd %%mm1,%%mm3\n"
188 "punpckhwd %%mm2,%%mm0\n"
192 "punpcklbw %%mm1,%%mm2\n"
193 "punpcklwd %%mm4,%%mm2\n"
198 "punpcklbw %%mm1,%%mm4\n"
199 "punpckhwd %%mm2,%%mm4\n"
206 "punpckhbw %%mm2,%%mm6\n"
207 "punpckhbw %%mm1,%%mm5\n"
209 "punpcklwd %%mm6,%%mm1\n"
211 "punpckhwd %%mm6,%%mm5\n"
234 :
"m" (cr),
"r"(cb),
"r"(lum),
235 "r"(row1),
"r"(cols),
"r"(row2),
"m"(x),
"m"(y),
"m"(mod),
236 "m"(MMX_0080w),
"m"(MMX_VgrnRGB),
"m"(MMX_VredRGB),
237 "m"(MMX_FF00w),
"m"(MMX_00FFw),
"m"(MMX_UgrnRGB),
242 void Color565DitherYV12MMX1X(
int *colortab,
Uint32 *rgb_2_pix,
243 unsigned char *lum,
unsigned char *cr,
244 unsigned char *cb,
unsigned char *out,
245 int rows,
int cols,
int mod )
250 unsigned char* y = lum +cols*rows;
253 row2 = (
Uint16 *)out+cols+mod;
254 mod = (mod+cols+mod)*2;
256 __asm__ __volatile__(
262 "movl %%ebx, 4(%%esp)\n"
269 "pxor %%mm7, %%mm7\n"
271 "movl 4(%%esp), %%ebx\n"
272 "movd (%%ebx), %%mm1\n"
275 "punpcklbw %%mm7, %%mm0\n"
276 "punpcklbw %%mm7, %%mm1\n"
279 "movq %%mm0, %%mm2\n"
280 "movq %%mm1, %%mm3\n"
281 "pmullw %10, %%mm2\n"
283 "pmullw %11, %%mm0\n"
285 "pmullw %13, %%mm3\n"
287 "pmullw %14, %%mm1\n"
289 "pmullw %15, %%mm6\n"
290 "paddw %%mm3, %%mm2\n"
291 "pmullw %15, %%mm7\n"
293 "movq %%mm6, %%mm4\n"
294 "paddw %%mm0, %%mm6\n"
295 "movq %%mm4, %%mm5\n"
296 "paddw %%mm1, %%mm4\n"
297 "paddw %%mm2, %%mm5\n"
299 "movq %%mm7, %%mm3\n"
301 "paddw %%mm0, %%mm7\n"
303 "packuswb %%mm4, %%mm4\n"
304 "packuswb %%mm5, %%mm5\n"
305 "packuswb %%mm6, %%mm6\n"
306 "punpcklbw %%mm4, %%mm4\n"
307 "punpcklbw %%mm5, %%mm5\n"
311 "punpcklbw %%mm6, %%mm6\n"
316 "movq %%mm3, %%mm5\n"
317 "paddw %%mm1, %%mm3\n"
318 "paddw %%mm2, %%mm5\n"
322 "movq (%2, %4), %%mm6\n"
324 "packuswb %%mm3, %%mm3\n"
325 "packuswb %%mm5, %%mm5\n"
326 "packuswb %%mm7, %%mm7\n"
328 "punpcklbw %%mm3, %%mm3\n"
329 "punpcklbw %%mm5, %%mm5\n"
330 "pmullw %15, %%mm6\n"
331 "punpcklbw %%mm7, %%mm7\n"
338 "movq (%2,%4), %%mm7\n"
341 "movq %%mm4, %%mm5\n"
342 "punpcklwd %%mm3, %%mm4\n"
343 "pmullw %15, %%mm7\n"
344 "punpckhwd %%mm3, %%mm5\n"
347 "movq %%mm5, 8(%3)\n"
349 "movq %%mm6, %%mm4\n"
350 "paddw %%mm0, %%mm6\n"
352 "movq %%mm4, %%mm5\n"
353 "paddw %%mm1, %%mm4\n"
354 "paddw %%mm2, %%mm5\n"
356 "movq %%mm7, %%mm3\n"
358 "paddw %%mm0, %%mm7\n"
360 "movq %%mm3, %%mm0\n"
361 "packuswb %%mm4, %%mm4\n"
362 "paddw %%mm1, %%mm3\n"
363 "packuswb %%mm5, %%mm5\n"
364 "paddw %%mm2, %%mm0\n"
365 "packuswb %%mm6, %%mm6\n"
366 "punpcklbw %%mm4, %%mm4\n"
367 "punpcklbw %%mm5, %%mm5\n"
368 "punpcklbw %%mm6, %%mm6\n"
379 "packuswb %%mm3, %%mm3\n"
380 "packuswb %%mm0, %%mm0\n"
381 "packuswb %%mm7, %%mm7\n"
382 "punpcklbw %%mm3, %%mm3\n"
383 "punpcklbw %%mm0, %%mm0\n"
384 "punpcklbw %%mm7, %%mm7\n"
393 "movq %%mm4, %%mm5\n"
395 "punpcklwd %%mm3, %%mm4\n"
396 "punpckhwd %%mm3, %%mm5\n"
399 "movq %%mm5, 8(%5)\n"
419 :
"m" (cr),
"r"(cb),
"r"(lum),
420 "r"(row1),
"r"(cols),
"r"(row2),
"m"(x),
"m"(y),
"m"(mod),
421 "m"(MMX_0080w),
"m"(MMX_Ugrn565),
"m"(MMX_Ublu5x5),
422 "m"(MMX_00FFw),
"m"(MMX_Vgrn565),
"m"(MMX_Vred5x5),
423 "m"(MMX_Ycoeff),
"m"(MMX_red565),
"m"(MMX_grn565)
uint32_t Uint32
An unsigned 32-bit integer type.
EGLSurface EGLint EGLint y
uint16_t Uint16
An unsigned 16-bit integer type.