45 const unsigned A = info->
a;
60 if ( palmap ==
NULL ) {
61 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
63 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
90 unsigned sR, sG, sB, sA;
106 if ( palmap ==
NULL ) {
107 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
109 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
139 const unsigned A = info->
a;
146 if ( Pixel != ckey ) {
155 if ( palmap ==
NULL ) {
156 *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
158 *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
185 __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
187 hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe);
188 lmask = _mm_set_pi32(0x00010101, 0x00010101);
189 dsta = _mm_set_pi32(dalpha, dalpha);
196 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
197 + (s & d & 0x00010101)) | dalpha;
201 for (n >>= 1; n > 0; --
n) {
202 dst1 = *(__m64 *) dstp;
205 src1 = *(__m64 *) srcp;
208 dst2 = _mm_and_si64(dst2, hmask);
209 src2 = _mm_and_si64(src2, hmask);
210 src2 = _mm_add_pi32(src2, dst2);
211 src2 = _mm_srli_pi32(src2, 1);
213 dst1 = _mm_and_si64(dst1, src1);
214 dst1 = _mm_and_si64(dst1, lmask);
215 dst1 = _mm_add_pi32(dst1, src2);
216 dst1 = _mm_or_si64(dst1, dsta);
218 *(__m64 *) dstp = dst1;
237 if (alpha == 128 && (df->
Rmask | df->
Gmask | df->
Bmask) == 0x00FFFFFF) {
239 BlitRGBtoRGBSurfaceAlpha128MMX(info);
241 int width = info->
dst_w;
242 int height = info->
dst_h;
250 __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
252 mm_zero = _mm_setzero_si64();
254 amult = alpha | (alpha << 8);
255 amult = amult | (amult << 16);
257 (0xff << df->
Rshift) | (0xff << df->
258 Gshift) | (0xff << df->
Bshift);
259 mm_alpha = _mm_set_pi32(0, amult & chanmask);
260 mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero);
262 dsta = _mm_set_pi32(dalpha, dalpha);
268 src2 = _mm_cvtsi32_si64(*srcp);
269 src2 = _mm_unpacklo_pi8(src2, mm_zero);
271 dst1 = _mm_cvtsi32_si64(*dstp);
272 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
274 src2 = _mm_sub_pi16(src2, dst1);
275 src2 = _mm_mullo_pi16(src2, mm_alpha);
276 src2 = _mm_srli_pi16(src2, 8);
277 dst1 = _mm_add_pi8(src2, dst1);
279 dst1 = _mm_packs_pu16(dst1, mm_zero);
280 dst1 = _mm_or_si64(dst1, dsta);
281 *dstp = _mm_cvtsi64_si32(dst1);
289 for (n >>= 1; n > 0; --
n) {
291 src1 = *(__m64 *) srcp;
293 src1 = _mm_unpacklo_pi8(src1, mm_zero);
294 src2 = _mm_unpackhi_pi8(src2, mm_zero);
296 dst1 = *(__m64 *) dstp;
298 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
299 dst2 = _mm_unpackhi_pi8(dst2, mm_zero);
301 src1 = _mm_sub_pi16(src1, dst1);
302 src1 = _mm_mullo_pi16(src1, mm_alpha);
303 src1 = _mm_srli_pi16(src1, 8);
304 dst1 = _mm_add_pi8(src1, dst1);
306 src2 = _mm_sub_pi16(src2, dst2);
307 src2 = _mm_mullo_pi16(src2, mm_alpha);
308 src2 = _mm_srli_pi16(src2, 8);
309 dst2 = _mm_add_pi8(src2, dst2);
311 dst1 = _mm_packs_pu16(dst1, dst2);
312 dst1 = _mm_or_si64(dst1, dsta);
314 *(__m64 *) dstp = dst1;
330 int width = info->
dst_w;
331 int height = info->
dst_h;
340 Uint64 multmask, multmask2;
342 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
344 mm_zero = _mm_setzero_si64();
346 multmask <<= (ashift * 2);
347 multmask2 = 0x00FF00FF00FF00FF;
352 Uint32 alpha = *srcp & amask;
355 }
else if (alpha == amask) {
358 src1 = _mm_cvtsi32_si64(*srcp);
359 src1 = _mm_unpacklo_pi8(src1, mm_zero);
361 dst1 = _mm_cvtsi32_si64(*dstp);
362 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
364 mm_alpha = _mm_cvtsi32_si64(alpha);
365 mm_alpha = _mm_srli_si64(mm_alpha, ashift);
366 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
367 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
368 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);
369 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);
372 src1 = _mm_mullo_pi16(src1, mm_alpha);
373 src1 = _mm_srli_pi16(src1, 8);
374 dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
375 dst1 = _mm_srli_pi16(dst1, 8);
376 dst1 = _mm_add_pi16(src1, dst1);
377 dst1 = _mm_packs_pu16(dst1, mm_zero);
379 *dstp = _mm_cvtsi64_si32(dst1);
397 int width = info->
dst_w;
398 int height = info->
dst_h;
409 *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
410 + (s & d & 0x00010101)) | 0xff000000;
422 unsigned alpha = info->
a;
426 int width = info->
dst_w;
427 int height = info->
dst_h;
444 d1 = (d1 + ((s1 - d1) * alpha >> 8))
448 d = (d + ((s -
d) * alpha >> 8)) & 0xff00;
449 *dstp = d1 | d | 0xff000000;
464 int width = info->
dst_w;
465 int height = info->
dst_h;
496 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
499 d = (d + ((s -
d) * alpha >> 8)) & 0xff00;
500 dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8);
501 *dstp = d1 | d | (dalpha << 24);
518 int width = info->
dst_w;
519 int height = info->
dst_h;
528 Uint64 multmask, multmask2;
530 __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
532 mm_zero = _mm_setzero_si64();
534 multmask <<= (ashift * 2);
535 multmask2 = 0x00FF00FF00FF00FF;
542 _m_prefetch(srcp + 16);
543 _m_prefetch(dstp + 16);
545 alpha = *srcp & amask;
548 }
else if (alpha == amask) {
551 src1 = _mm_cvtsi32_si64(*srcp);
552 src1 = _mm_unpacklo_pi8(src1, mm_zero);
554 dst1 = _mm_cvtsi32_si64(*dstp);
555 dst1 = _mm_unpacklo_pi8(dst1, mm_zero);
557 mm_alpha = _mm_cvtsi32_si64(alpha);
558 mm_alpha = _mm_srli_si64(mm_alpha, ashift);
559 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
560 mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
561 mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask);
562 mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2);
566 src1 = _mm_mullo_pi16(src1, mm_alpha);
567 src1 = _mm_srli_pi16(src1, 8);
568 dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
569 dst1 = _mm_srli_pi16(dst1, 8);
570 dst1 = _mm_add_pi16(src1, dst1);
571 dst1 = _mm_packs_pu16(dst1, mm_zero);
573 *dstp = _mm_cvtsi64_si32(dst1);
590 #define BLEND16_50(d, s, mask) \
591 ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff)))
594 #define BLEND2x16_50(d, s, mask) \
595 (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
596 + (s & d & (~(mask | mask << 16))))
601 int width = info->
dst_w;
602 int height = info->
dst_h;
629 prev_sw = ((
Uint32 *) srcp)[-1];
635 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
636 s = (prev_sw << 16) + (sw >> 16);
638 s = (prev_sw >> 16) + (sw << 16);
650 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
653 s = (
Uint16) (prev_sw >> 16);
703 unsigned alpha = info->
a;
707 int width = info->
dst_w;
708 int height = info->
dst_h;
715 __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
717 alpha &= ~(1 + 2 + 4);
718 mm_alpha = _mm_set_pi32(0, alpha);
721 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
722 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
725 mm_alpha = _mm_slli_si64(mm_alpha, 3);
728 gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0);
729 bmask = _mm_set_pi32(0x001F001F, 0x001F001F);
742 s = (s | s << 16) & 0x07e0f81f;
743 d = (d | d << 16) & 0x07e0f81f;
744 d += (s -
d) * alpha >> 5;
746 *dstp++ = (
Uint16)(d | d >> 16);
755 s = (s | s << 16) & 0x07e0f81f;
756 d = (d | d << 16) & 0x07e0f81f;
757 d += (s -
d) * alpha >> 5;
759 *dstp++ = (
Uint16)(d | d >> 16);
767 s = (s | s << 16) & 0x07e0f81f;
768 d = (d | d << 16) & 0x07e0f81f;
769 d += (s -
d) * alpha >> 5;
771 *dstp++ = (
Uint16)(d | d >> 16);
773 src1 = *(__m64*)srcp;
774 dst1 = *(__m64*)dstp;
778 src2 = _mm_srli_pi16(src2, 11);
781 dst2 = _mm_srli_pi16(dst2, 11);
784 src2 = _mm_sub_pi16(src2, dst2);
785 src2 = _mm_mullo_pi16(src2, mm_alpha);
786 src2 = _mm_srli_pi16(src2, 11);
787 dst2 = _mm_add_pi16(src2, dst2);
788 dst2 = _mm_slli_pi16(dst2, 11);
794 src2 = _mm_and_si64(src2, gmask);
797 dst2 = _mm_and_si64(dst2, gmask);
800 src2 = _mm_sub_pi16(src2, dst2);
801 src2 = _mm_mulhi_pi16(src2, mm_alpha);
802 src2 = _mm_slli_pi16(src2, 5);
803 dst2 = _mm_add_pi16(src2, dst2);
805 mm_res = _mm_or_si64(mm_res, dst2);
809 src2 = _mm_and_si64(src2, bmask);
812 dst2 = _mm_and_si64(dst2, bmask);
815 src2 = _mm_sub_pi16(src2, dst2);
816 src2 = _mm_mullo_pi16(src2, mm_alpha);
817 src2 = _mm_srli_pi16(src2, 11);
818 dst2 = _mm_add_pi16(src2, dst2);
819 dst2 = _mm_and_si64(dst2, bmask);
821 mm_res = _mm_or_si64(mm_res, dst2);
823 *(__m64*)dstp = mm_res;
840 unsigned alpha = info->
a;
844 int width = info->
dst_w;
845 int height = info->
dst_h;
852 __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
854 alpha &= ~(1 + 2 + 4);
855 mm_alpha = _mm_set_pi32(0, alpha);
858 mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha);
859 mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha);
862 mm_alpha = _mm_slli_si64(mm_alpha, 3);
865 rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00);
866 gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0);
867 bmask = _mm_set_pi32(0x001F001F, 0x001F001F);
880 s = (s | s << 16) & 0x03e07c1f;
881 d = (d | d << 16) & 0x03e07c1f;
882 d += (s -
d) * alpha >> 5;
884 *dstp++ = (
Uint16)(d | d >> 16);
893 s = (s | s << 16) & 0x03e07c1f;
894 d = (d | d << 16) & 0x03e07c1f;
895 d += (s -
d) * alpha >> 5;
897 *dstp++ = (
Uint16)(d | d >> 16);
905 s = (s | s << 16) & 0x03e07c1f;
906 d = (d | d << 16) & 0x03e07c1f;
907 d += (s -
d) * alpha >> 5;
909 *dstp++ = (
Uint16)(d | d >> 16);
911 src1 = *(__m64*)srcp;
912 dst1 = *(__m64*)dstp;
916 src2 = _mm_and_si64(src2, rmask);
919 dst2 = _mm_and_si64(dst2, rmask);
922 src2 = _mm_sub_pi16(src2, dst2);
923 src2 = _mm_mulhi_pi16(src2, mm_alpha);
924 src2 = _mm_slli_pi16(src2, 5);
925 dst2 = _mm_add_pi16(src2, dst2);
926 dst2 = _mm_and_si64(dst2, rmask);
932 src2 = _mm_and_si64(src2, gmask);
935 dst2 = _mm_and_si64(dst2, gmask);
938 src2 = _mm_sub_pi16(src2, dst2);
939 src2 = _mm_mulhi_pi16(src2, mm_alpha);
940 src2 = _mm_slli_pi16(src2, 5);
941 dst2 = _mm_add_pi16(src2, dst2);
943 mm_res = _mm_or_si64(mm_res, dst2);
947 src2 = _mm_and_si64(src2, bmask);
950 dst2 = _mm_and_si64(dst2, bmask);
953 src2 = _mm_sub_pi16(src2, dst2);
954 src2 = _mm_mullo_pi16(src2, mm_alpha);
955 src2 = _mm_srli_pi16(src2, 11);
956 dst2 = _mm_add_pi16(src2, dst2);
957 dst2 = _mm_and_si64(dst2, bmask);
959 mm_res = _mm_or_si64(mm_res, dst2);
961 *(__m64*)dstp = mm_res;
980 unsigned alpha = info->
a;
984 int width = info->
dst_w;
985 int height = info->
dst_h;
1002 s = (s | s << 16) & 0x07e0f81f;
1003 d = (d | d << 16) & 0x07e0f81f;
1004 d += (s -
d) * alpha >> 5;
1006 *dstp++ = (
Uint16)(d | d >> 16);
1019 unsigned alpha = info->
a;
1023 int width = info->
dst_w;
1024 int height = info->
dst_h;
1041 s = (s | s << 16) & 0x03e07c1f;
1042 d = (d | d << 16) & 0x03e07c1f;
1043 d += (s -
d) * alpha >> 5;
1045 *dstp++ = (
Uint16)(d | d >> 16);
1058 int width = info->
dst_w;
1059 int height = info->
dst_h;
1069 unsigned alpha = s >> 27;
1076 *dstp = (
Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
1083 s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
1085 d = (d | d << 16) & 0x07e0f81f;
1086 d += (s -
d) * alpha >> 5;
1088 *dstp = (
Uint16)(d | d >> 16);
1104 int width = info->
dst_w;
1105 int height = info->
dst_h;
1123 *dstp = (
Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
1130 s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
1132 d = (d | d << 16) & 0x03e07c1f;
1133 d += (s -
d) * alpha >> 5;
1135 *dstp = (
Uint16)(d | d >> 16);
1151 int width = info->
dst_w;
1152 int height = info->
dst_h;
1162 unsigned sR, sG, sB;
1163 unsigned dR, dG, dB, dA;
1164 const unsigned sA = info->
a;
1190 int width = info->
dst_w;
1191 int height = info->
dst_h;
1202 unsigned sR, sG, sB;
1203 unsigned dR, dG, dB, dA;
1204 const unsigned sA = info->
a;
1211 if(sA && Pixel != ckey) {
1231 int width = info->
dst_w;
1232 int height = info->
dst_h;
1242 unsigned sR, sG, sB, sA;
1243 unsigned dR, dG, dB, dA;
1285 && sf->
Gmask == 0xff00
1287 || (sf->
Bmask == 0xff && df->
Bmask == 0x1f))) {
1288 if (df->
Gmask == 0x7e0)
1290 else if (df->
Gmask == 0x3e0)
1299 #if defined(__MMX__) || defined(__3dNOW__)
1306 return BlitRGBtoRGBPixelAlphaMMX3DNOW;
1310 return BlitRGBtoRGBPixelAlphaMMX;
1314 if (sf->
Amask == 0xff000000) {
1327 if (sf->
Amask == 0) {
1335 if (df->
Gmask == 0x7e0) {
1338 return Blit565to565SurfaceAlphaMMX;
1342 }
else if (df->
Gmask == 0x3e0) {
1345 return Blit555to555SurfaceAlphaMMX;
1361 return BlitRGBtoRGBSurfaceAlphaMMX;
1377 if (sf->
Amask == 0) {
static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info)
#define BLEND16_50(d, s, mask)
static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info)
static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info)
#define SDL_COPY_COLORKEY
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA)
#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB)
static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
SDL_PixelFormat * src_fmt
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)
A collection of pixels used in software blitting.
DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void)
EGLSurface EGLint EGLint EGLint EGLint height
#define SDL_COPY_RLE_MASK
return Display return Display Bool Bool int d
static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info)
static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
uint32_t Uint32
An unsigned 32-bit integer type.
static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
EGLSurface EGLint EGLint EGLint width
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
uint64_t Uint64
An unsigned 64-bit integer type.
#define DUFFS_LOOP4(pixel_copy_increment, width)
static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info)
#define DUFFS_LOOP(pixel_copy_increment, width)
DECLSPEC SDL_bool SDLCALL SDL_Has3DNow(void)
static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
GLclampf GLclampf GLclampf alpha
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
#define DUFFS_LOOP_124(pixel_copy_increment1,pixel_copy_increment2,pixel_copy_increment4, width)
static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
SDL_PixelFormat * dst_fmt
#define SDL_COPY_MODULATE_ALPHA
uint8_t Uint8
An unsigned 8-bit integer type.
static void BlitNto1PixelAlpha(SDL_BlitInfo *info)
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info)
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)
GLint GLint GLint GLint GLint w
uint16_t Uint16
An unsigned 16-bit integer type.
SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
#define BLEND2x16_50(d, s, mask)