zenilib  0.5.3.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
SDL_blit_A.c
Go to the documentation of this file.
1 /*
2  Simple DirectMedia Layer
3  Copyright (C) 1997-2013 Sam Lantinga <slouken@libsdl.org>
4 
5  This software is provided 'as-is', without any express or implied
6  warranty. In no event will the authors be held liable for any damages
7  arising from the use of this software.
8 
9  Permission is granted to anyone to use this software for any purpose,
10  including commercial applications, and to alter it and redistribute it
11  freely, subject to the following restrictions:
12 
13  1. The origin of this software must not be misrepresented; you must not
14  claim that you wrote the original software. If you use this software
15  in a product, an acknowledgment in the product documentation would be
16  appreciated but is not required.
17  2. Altered source versions must be plainly marked as such, and must not be
18  misrepresented as being the original software.
19  3. This notice may not be removed or altered from any source distribution.
20 */
21 #include "SDL_config.h"
22 
23 #include "SDL_video.h"
24 #include "SDL_blit.h"
25 
26 /* Functions to perform alpha blended blitting */
27 
28 /* N->1 blending with per-surface alpha */
29 static void
31 {
32  int width = info->dst_w;
33  int height = info->dst_h;
34  Uint8 *src = info->src;
35  int srcskip = info->src_skip;
36  Uint8 *dst = info->dst;
37  int dstskip = info->dst_skip;
38  Uint8 *palmap = info->table;
39  SDL_PixelFormat *srcfmt = info->src_fmt;
40  SDL_PixelFormat *dstfmt = info->dst_fmt;
41  int srcbpp = srcfmt->BytesPerPixel;
42  Uint32 Pixel;
43  unsigned sR, sG, sB;
44  unsigned dR, dG, dB;
45  const unsigned A = info->a;
46 
47  while (height--) {
48  /* *INDENT-OFF* */
50  {
51  DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
52  dR = dstfmt->palette->colors[*dst].r;
53  dG = dstfmt->palette->colors[*dst].g;
54  dB = dstfmt->palette->colors[*dst].b;
55  ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
56  dR &= 0xff;
57  dG &= 0xff;
58  dB &= 0xff;
59  /* Pack RGB into 8bit pixel */
60  if ( palmap == NULL ) {
61  *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
62  } else {
63  *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
64  }
65  dst++;
66  src += srcbpp;
67  },
68  width);
69  /* *INDENT-ON* */
70  src += srcskip;
71  dst += dstskip;
72  }
73 }
74 
75 /* N->1 blending with pixel alpha */
76 static void
78 {
79  int width = info->dst_w;
80  int height = info->dst_h;
81  Uint8 *src = info->src;
82  int srcskip = info->src_skip;
83  Uint8 *dst = info->dst;
84  int dstskip = info->dst_skip;
85  Uint8 *palmap = info->table;
86  SDL_PixelFormat *srcfmt = info->src_fmt;
87  SDL_PixelFormat *dstfmt = info->dst_fmt;
88  int srcbpp = srcfmt->BytesPerPixel;
89  Uint32 Pixel;
90  unsigned sR, sG, sB, sA;
91  unsigned dR, dG, dB;
92 
93  while (height--) {
94  /* *INDENT-OFF* */
96  {
97  DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA);
98  dR = dstfmt->palette->colors[*dst].r;
99  dG = dstfmt->palette->colors[*dst].g;
100  dB = dstfmt->palette->colors[*dst].b;
101  ALPHA_BLEND_RGB(sR, sG, sB, sA, dR, dG, dB);
102  dR &= 0xff;
103  dG &= 0xff;
104  dB &= 0xff;
105  /* Pack RGB into 8bit pixel */
106  if ( palmap == NULL ) {
107  *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
108  } else {
109  *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
110  }
111  dst++;
112  src += srcbpp;
113  },
114  width);
115  /* *INDENT-ON* */
116  src += srcskip;
117  dst += dstskip;
118  }
119 }
120 
121 /* colorkeyed N->1 blending with per-surface alpha */
122 static void
124 {
125  int width = info->dst_w;
126  int height = info->dst_h;
127  Uint8 *src = info->src;
128  int srcskip = info->src_skip;
129  Uint8 *dst = info->dst;
130  int dstskip = info->dst_skip;
131  Uint8 *palmap = info->table;
132  SDL_PixelFormat *srcfmt = info->src_fmt;
133  SDL_PixelFormat *dstfmt = info->dst_fmt;
134  int srcbpp = srcfmt->BytesPerPixel;
135  Uint32 ckey = info->colorkey;
136  Uint32 Pixel;
137  unsigned sR, sG, sB;
138  unsigned dR, dG, dB;
139  const unsigned A = info->a;
140 
141  while (height--) {
142  /* *INDENT-OFF* */
143  DUFFS_LOOP(
144  {
145  DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
146  if ( Pixel != ckey ) {
147  dR = dstfmt->palette->colors[*dst].r;
148  dG = dstfmt->palette->colors[*dst].g;
149  dB = dstfmt->palette->colors[*dst].b;
150  ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB);
151  dR &= 0xff;
152  dG &= 0xff;
153  dB &= 0xff;
154  /* Pack RGB into 8bit pixel */
155  if ( palmap == NULL ) {
156  *dst =((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0));
157  } else {
158  *dst = palmap[((dR>>5)<<(3+2))|((dG>>5)<<(2))|((dB>>6)<<(0))];
159  }
160  }
161  dst++;
162  src += srcbpp;
163  },
164  width);
165  /* *INDENT-ON* */
166  src += srcskip;
167  dst += dstskip;
168  }
169 }
170 
171 #ifdef __MMX__
172 
173 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
174 static void
175 BlitRGBtoRGBSurfaceAlpha128MMX(SDL_BlitInfo * info)
176 {
177  int width = info->dst_w;
178  int height = info->dst_h;
179  Uint32 *srcp = (Uint32 *) info->src;
180  int srcskip = info->src_skip >> 2;
181  Uint32 *dstp = (Uint32 *) info->dst;
182  int dstskip = info->dst_skip >> 2;
183  Uint32 dalpha = info->dst_fmt->Amask;
184 
185  __m64 src1, src2, dst1, dst2, lmask, hmask, dsta;
186 
187  hmask = _mm_set_pi32(0x00fefefe, 0x00fefefe); /* alpha128 mask -> hmask */
188  lmask = _mm_set_pi32(0x00010101, 0x00010101); /* !alpha128 mask -> lmask */
189  dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
190 
191  while (height--) {
192  int n = width;
193  if (n & 1) {
194  Uint32 s = *srcp++;
195  Uint32 d = *dstp;
196  *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
197  + (s & d & 0x00010101)) | dalpha;
198  n--;
199  }
200 
201  for (n >>= 1; n > 0; --n) {
202  dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
203  dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
204 
205  src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
206  src2 = src1; /* 2 x src -> src2(ARGBARGB) */
207 
208  dst2 = _mm_and_si64(dst2, hmask); /* dst & mask -> dst2 */
209  src2 = _mm_and_si64(src2, hmask); /* src & mask -> src2 */
210  src2 = _mm_add_pi32(src2, dst2); /* dst2 + src2 -> src2 */
211  src2 = _mm_srli_pi32(src2, 1); /* src2 >> 1 -> src2 */
212 
213  dst1 = _mm_and_si64(dst1, src1); /* src & dst -> dst1 */
214  dst1 = _mm_and_si64(dst1, lmask); /* dst1 & !mask -> dst1 */
215  dst1 = _mm_add_pi32(dst1, src2); /* src2 + dst1 -> dst1 */
216  dst1 = _mm_or_si64(dst1, dsta); /* dsta(full alpha) | dst1 -> dst1 */
217 
218  *(__m64 *) dstp = dst1; /* dst1 -> 2 x dst pixels */
219  dstp += 2;
220  srcp += 2;
221  }
222 
223  srcp += srcskip;
224  dstp += dstskip;
225  }
226  _mm_empty();
227 }
228 
229 /* fast RGB888->(A)RGB888 blending with surface alpha */
230 static void
231 BlitRGBtoRGBSurfaceAlphaMMX(SDL_BlitInfo * info)
232 {
233  SDL_PixelFormat *df = info->dst_fmt;
234  Uint32 chanmask;
235  unsigned alpha = info->a;
236 
237  if (alpha == 128 && (df->Rmask | df->Gmask | df->Bmask) == 0x00FFFFFF) {
238  /* only call a128 version when R,G,B occupy lower bits */
239  BlitRGBtoRGBSurfaceAlpha128MMX(info);
240  } else {
241  int width = info->dst_w;
242  int height = info->dst_h;
243  Uint32 *srcp = (Uint32 *) info->src;
244  int srcskip = info->src_skip >> 2;
245  Uint32 *dstp = (Uint32 *) info->dst;
246  int dstskip = info->dst_skip >> 2;
247  Uint32 dalpha = df->Amask;
248  Uint32 amult;
249 
250  __m64 src1, src2, dst1, dst2, mm_alpha, mm_zero, dsta;
251 
252  mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
253  /* form the alpha mult */
254  amult = alpha | (alpha << 8);
255  amult = amult | (amult << 16);
256  chanmask =
257  (0xff << df->Rshift) | (0xff << df->
258  Gshift) | (0xff << df->Bshift);
259  mm_alpha = _mm_set_pi32(0, amult & chanmask); /* 0000AAAA -> mm_alpha, minus 1 chan */
260  mm_alpha = _mm_unpacklo_pi8(mm_alpha, mm_zero); /* 0A0A0A0A -> mm_alpha, minus 1 chan */
261  /* at this point mm_alpha can be 000A0A0A or 0A0A0A00 or another combo */
262  dsta = _mm_set_pi32(dalpha, dalpha); /* dst alpha mask -> dsta */
263 
264  while (height--) {
265  int n = width;
266  if (n & 1) {
267  /* One Pixel Blend */
268  src2 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src2 (0000ARGB) */
269  src2 = _mm_unpacklo_pi8(src2, mm_zero); /* 0A0R0G0B -> src2 */
270 
271  dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
272  dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
273 
274  src2 = _mm_sub_pi16(src2, dst1); /* src2 - dst2 -> src2 */
275  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
276  src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
277  dst1 = _mm_add_pi8(src2, dst1); /* src2 + dst1 -> dst1 */
278 
279  dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */
280  dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
281  *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
282 
283  ++srcp;
284  ++dstp;
285 
286  n--;
287  }
288 
289  for (n >>= 1; n > 0; --n) {
290  /* Two Pixels Blend */
291  src1 = *(__m64 *) srcp; /* 2 x src -> src1(ARGBARGB) */
292  src2 = src1; /* 2 x src -> src2(ARGBARGB) */
293  src1 = _mm_unpacklo_pi8(src1, mm_zero); /* low - 0A0R0G0B -> src1 */
294  src2 = _mm_unpackhi_pi8(src2, mm_zero); /* high - 0A0R0G0B -> src2 */
295 
296  dst1 = *(__m64 *) dstp; /* 2 x dst -> dst1(ARGBARGB) */
297  dst2 = dst1; /* 2 x dst -> dst2(ARGBARGB) */
298  dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* low - 0A0R0G0B -> dst1 */
299  dst2 = _mm_unpackhi_pi8(dst2, mm_zero); /* high - 0A0R0G0B -> dst2 */
300 
301  src1 = _mm_sub_pi16(src1, dst1); /* src1 - dst1 -> src1 */
302  src1 = _mm_mullo_pi16(src1, mm_alpha); /* src1 * alpha -> src1 */
303  src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1 */
304  dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst1) -> dst1 */
305 
306  src2 = _mm_sub_pi16(src2, dst2); /* src2 - dst2 -> src2 */
307  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
308  src2 = _mm_srli_pi16(src2, 8); /* src2 >> 8 -> src2 */
309  dst2 = _mm_add_pi8(src2, dst2); /* src2 + dst2(dst2) -> dst2 */
310 
311  dst1 = _mm_packs_pu16(dst1, dst2); /* 0A0R0G0B(res1), 0A0R0G0B(res2) -> dst1(ARGBARGB) */
312  dst1 = _mm_or_si64(dst1, dsta); /* dsta | dst1 -> dst1 */
313 
314  *(__m64 *) dstp = dst1; /* dst1 -> 2 x pixel */
315 
316  srcp += 2;
317  dstp += 2;
318  }
319  srcp += srcskip;
320  dstp += dstskip;
321  }
322  _mm_empty();
323  }
324 }
325 
326 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
327 static void
328 BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
329 {
330  int width = info->dst_w;
331  int height = info->dst_h;
332  Uint32 *srcp = (Uint32 *) info->src;
333  int srcskip = info->src_skip >> 2;
334  Uint32 *dstp = (Uint32 *) info->dst;
335  int dstskip = info->dst_skip >> 2;
336  SDL_PixelFormat *sf = info->src_fmt;
337  Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
338  Uint32 amask = sf->Amask;
339  Uint32 ashift = sf->Ashift;
340  Uint64 multmask, multmask2;
341 
342  __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
343 
344  mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
345  multmask = 0x00FF;
346  multmask <<= (ashift * 2);
347  multmask2 = 0x00FF00FF00FF00FF;
348 
349  while (height--) {
350  /* *INDENT-OFF* */
351  DUFFS_LOOP4({
352  Uint32 alpha = *srcp & amask;
353  if (alpha == 0) {
354  /* do nothing */
355  } else if (alpha == amask) {
356  *dstp = *srcp;
357  } else {
358  src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */
359  src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
360 
361  dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
362  dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
363 
364  mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
365  mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
366  mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
367  mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
368  mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha */
369  mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha */
370 
371  /* blend */
372  src1 = _mm_mullo_pi16(src1, mm_alpha);
373  src1 = _mm_srli_pi16(src1, 8);
374  dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
375  dst1 = _mm_srli_pi16(dst1, 8);
376  dst1 = _mm_add_pi16(src1, dst1);
377  dst1 = _mm_packs_pu16(dst1, mm_zero);
378 
379  *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
380  }
381  ++srcp;
382  ++dstp;
383  }, width);
384  /* *INDENT-ON* */
385  srcp += srcskip;
386  dstp += dstskip;
387  }
388  _mm_empty();
389 }
390 
391 #endif /* __MMX__ */
392 
393 /* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
394 static void
396 {
397  int width = info->dst_w;
398  int height = info->dst_h;
399  Uint32 *srcp = (Uint32 *) info->src;
400  int srcskip = info->src_skip >> 2;
401  Uint32 *dstp = (Uint32 *) info->dst;
402  int dstskip = info->dst_skip >> 2;
403 
404  while (height--) {
405  /* *INDENT-OFF* */
406  DUFFS_LOOP4({
407  Uint32 s = *srcp++;
408  Uint32 d = *dstp;
409  *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1)
410  + (s & d & 0x00010101)) | 0xff000000;
411  }, width);
412  /* *INDENT-ON* */
413  srcp += srcskip;
414  dstp += dstskip;
415  }
416 }
417 
418 /* fast RGB888->(A)RGB888 blending with surface alpha */
419 static void
421 {
422  unsigned alpha = info->a;
423  if (alpha == 128) {
425  } else {
426  int width = info->dst_w;
427  int height = info->dst_h;
428  Uint32 *srcp = (Uint32 *) info->src;
429  int srcskip = info->src_skip >> 2;
430  Uint32 *dstp = (Uint32 *) info->dst;
431  int dstskip = info->dst_skip >> 2;
432  Uint32 s;
433  Uint32 d;
434  Uint32 s1;
435  Uint32 d1;
436 
437  while (height--) {
438  /* *INDENT-OFF* */
439  DUFFS_LOOP4({
440  s = *srcp;
441  d = *dstp;
442  s1 = s & 0xff00ff;
443  d1 = d & 0xff00ff;
444  d1 = (d1 + ((s1 - d1) * alpha >> 8))
445  & 0xff00ff;
446  s &= 0xff00;
447  d &= 0xff00;
448  d = (d + ((s - d) * alpha >> 8)) & 0xff00;
449  *dstp = d1 | d | 0xff000000;
450  ++srcp;
451  ++dstp;
452  }, width);
453  /* *INDENT-ON* */
454  srcp += srcskip;
455  dstp += dstskip;
456  }
457  }
458 }
459 
460 /* fast ARGB888->(A)RGB888 blending with pixel alpha */
461 static void
463 {
464  int width = info->dst_w;
465  int height = info->dst_h;
466  Uint32 *srcp = (Uint32 *) info->src;
467  int srcskip = info->src_skip >> 2;
468  Uint32 *dstp = (Uint32 *) info->dst;
469  int dstskip = info->dst_skip >> 2;
470 
471  while (height--) {
472  /* *INDENT-OFF* */
473  DUFFS_LOOP4({
474  Uint32 dalpha;
475  Uint32 d;
476  Uint32 s1;
477  Uint32 d1;
478  Uint32 s = *srcp;
479  Uint32 alpha = s >> 24;
480  /* FIXME: Here we special-case opaque alpha since the
481  compositioning used (>>8 instead of /255) doesn't handle
482  it correctly. Also special-case alpha=0 for speed?
483  Benchmark this! */
484  if (alpha) {
485  if (alpha == SDL_ALPHA_OPAQUE) {
486  *dstp = *srcp;
487  } else {
488  /*
489  * take out the middle component (green), and process
490  * the other two in parallel. One multiply less.
491  */
492  d = *dstp;
493  dalpha = d >> 24;
494  s1 = s & 0xff00ff;
495  d1 = d & 0xff00ff;
496  d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff;
497  s &= 0xff00;
498  d &= 0xff00;
499  d = (d + ((s - d) * alpha >> 8)) & 0xff00;
500  dalpha = alpha + (dalpha * (alpha ^ 0xFF) >> 8);
501  *dstp = d1 | d | (dalpha << 24);
502  }
503  }
504  ++srcp;
505  ++dstp;
506  }, width);
507  /* *INDENT-ON* */
508  srcp += srcskip;
509  dstp += dstskip;
510  }
511 }
512 
513 #ifdef __3dNOW__
514 /* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
515 static void
516 BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
517 {
518  int width = info->dst_w;
519  int height = info->dst_h;
520  Uint32 *srcp = (Uint32 *) info->src;
521  int srcskip = info->src_skip >> 2;
522  Uint32 *dstp = (Uint32 *) info->dst;
523  int dstskip = info->dst_skip >> 2;
524  SDL_PixelFormat *sf = info->src_fmt;
525  Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
526  Uint32 amask = sf->Amask;
527  Uint32 ashift = sf->Ashift;
528  Uint64 multmask, multmask2;
529 
530  __m64 src1, dst1, mm_alpha, mm_zero, mm_alpha2;
531 
532  mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
533  multmask = 0x00FF;
534  multmask <<= (ashift * 2);
535  multmask2 = 0x00FF00FF00FF00FF;
536 
537  while (height--) {
538  /* *INDENT-OFF* */
539  DUFFS_LOOP4({
540  Uint32 alpha;
541 
542  _m_prefetch(srcp + 16);
543  _m_prefetch(dstp + 16);
544 
545  alpha = *srcp & amask;
546  if (alpha == 0) {
547  /* do nothing */
548  } else if (alpha == amask) {
549  *dstp = *srcp;
550  } else {
551  src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB) */
552  src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
553 
554  dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB) */
555  dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
556 
557  mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
558  mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
559  mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
560  mm_alpha2 = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha2 */
561  mm_alpha = _mm_or_si64(mm_alpha2, *(__m64 *) & multmask); /* 0F0A0A0A -> mm_alpha */
562  mm_alpha2 = _mm_xor_si64(mm_alpha2, *(__m64 *) & multmask2); /* 255 - mm_alpha -> mm_alpha */
563 
564 
565  /* blend */
566  src1 = _mm_mullo_pi16(src1, mm_alpha);
567  src1 = _mm_srli_pi16(src1, 8);
568  dst1 = _mm_mullo_pi16(dst1, mm_alpha2);
569  dst1 = _mm_srli_pi16(dst1, 8);
570  dst1 = _mm_add_pi16(src1, dst1);
571  dst1 = _mm_packs_pu16(dst1, mm_zero);
572 
573  *dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
574  }
575  ++srcp;
576  ++dstp;
577  }, width);
578  /* *INDENT-ON* */
579  srcp += srcskip;
580  dstp += dstskip;
581  }
582  _mm_empty();
583 }
584 
585 #endif /* __MMX__ */
586 
587 /* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
588 
589 /* blend a single 16 bit pixel at 50% */
590 #define BLEND16_50(d, s, mask) \
591  ((((s & mask) + (d & mask)) >> 1) + (s & d & (~mask & 0xffff)))
592 
593 /* blend two 16 bit pixels at 50% */
594 #define BLEND2x16_50(d, s, mask) \
595  (((s & (mask | mask << 16)) >> 1) + ((d & (mask | mask << 16)) >> 1) \
596  + (s & d & (~(mask | mask << 16))))
597 
598 static void
600 {
601  int width = info->dst_w;
602  int height = info->dst_h;
603  Uint16 *srcp = (Uint16 *) info->src;
604  int srcskip = info->src_skip >> 1;
605  Uint16 *dstp = (Uint16 *) info->dst;
606  int dstskip = info->dst_skip >> 1;
607 
608  while (height--) {
609  if (((uintptr_t) srcp ^ (uintptr_t) dstp) & 2) {
610  /*
611  * Source and destination not aligned, pipeline it.
612  * This is mostly a win for big blits but no loss for
613  * small ones
614  */
615  Uint32 prev_sw;
616  int w = width;
617 
618  /* handle odd destination */
619  if ((uintptr_t) dstp & 2) {
620  Uint16 d = *dstp, s = *srcp;
621  *dstp = BLEND16_50(d, s, mask);
622  dstp++;
623  srcp++;
624  w--;
625  }
626  srcp++; /* srcp is now 32-bit aligned */
627 
628  /* bootstrap pipeline with first halfword */
629  prev_sw = ((Uint32 *) srcp)[-1];
630 
631  while (w > 1) {
632  Uint32 sw, dw, s;
633  sw = *(Uint32 *) srcp;
634  dw = *(Uint32 *) dstp;
635 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
636  s = (prev_sw << 16) + (sw >> 16);
637 #else
638  s = (prev_sw >> 16) + (sw << 16);
639 #endif
640  prev_sw = sw;
641  *(Uint32 *) dstp = BLEND2x16_50(dw, s, mask);
642  dstp += 2;
643  srcp += 2;
644  w -= 2;
645  }
646 
647  /* final pixel if any */
648  if (w) {
649  Uint16 d = *dstp, s;
650 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
651  s = (Uint16) prev_sw;
652 #else
653  s = (Uint16) (prev_sw >> 16);
654 #endif
655  *dstp = BLEND16_50(d, s, mask);
656  srcp++;
657  dstp++;
658  }
659  srcp += srcskip - 1;
660  dstp += dstskip;
661  } else {
662  /* source and destination are aligned */
663  int w = width;
664 
665  /* first odd pixel? */
666  if ((uintptr_t) srcp & 2) {
667  Uint16 d = *dstp, s = *srcp;
668  *dstp = BLEND16_50(d, s, mask);
669  srcp++;
670  dstp++;
671  w--;
672  }
673  /* srcp and dstp are now 32-bit aligned */
674 
675  while (w > 1) {
676  Uint32 sw = *(Uint32 *) srcp;
677  Uint32 dw = *(Uint32 *) dstp;
678  *(Uint32 *) dstp = BLEND2x16_50(dw, sw, mask);
679  srcp += 2;
680  dstp += 2;
681  w -= 2;
682  }
683 
684  /* last odd pixel? */
685  if (w) {
686  Uint16 d = *dstp, s = *srcp;
687  *dstp = BLEND16_50(d, s, mask);
688  srcp++;
689  dstp++;
690  }
691  srcp += srcskip;
692  dstp += dstskip;
693  }
694  }
695 }
696 
697 #ifdef __MMX__
698 
699 /* fast RGB565->RGB565 blending with surface alpha */
700 static void
701 Blit565to565SurfaceAlphaMMX(SDL_BlitInfo * info)
702 {
703  unsigned alpha = info->a;
704  if (alpha == 128) {
705  Blit16to16SurfaceAlpha128(info, 0xf7de);
706  } else {
707  int width = info->dst_w;
708  int height = info->dst_h;
709  Uint16 *srcp = (Uint16 *) info->src;
710  int srcskip = info->src_skip >> 1;
711  Uint16 *dstp = (Uint16 *) info->dst;
712  int dstskip = info->dst_skip >> 1;
713  Uint32 s, d;
714 
715  __m64 src1, dst1, src2, dst2, gmask, bmask, mm_res, mm_alpha;
716 
717  alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
718  mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
719  alpha >>= 3; /* downscale alpha to 5 bits */
720 
721  mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
722  mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
723  /* position alpha to allow for mullo and mulhi on diff channels
724  to reduce the number of operations */
725  mm_alpha = _mm_slli_si64(mm_alpha, 3);
726 
727  /* Setup the 565 color channel masks */
728  gmask = _mm_set_pi32(0x07E007E0, 0x07E007E0); /* MASKGREEN -> gmask */
729  bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
730 
731  while (height--) {
732  /* *INDENT-OFF* */
734  {
735  s = *srcp++;
736  d = *dstp;
737  /*
738  * shift out the middle component (green) to
739  * the high 16 bits, and process all three RGB
740  * components at the same time.
741  */
742  s = (s | s << 16) & 0x07e0f81f;
743  d = (d | d << 16) & 0x07e0f81f;
744  d += (s - d) * alpha >> 5;
745  d &= 0x07e0f81f;
746  *dstp++ = (Uint16)(d | d >> 16);
747  },{
748  s = *srcp++;
749  d = *dstp;
750  /*
751  * shift out the middle component (green) to
752  * the high 16 bits, and process all three RGB
753  * components at the same time.
754  */
755  s = (s | s << 16) & 0x07e0f81f;
756  d = (d | d << 16) & 0x07e0f81f;
757  d += (s - d) * alpha >> 5;
758  d &= 0x07e0f81f;
759  *dstp++ = (Uint16)(d | d >> 16);
760  s = *srcp++;
761  d = *dstp;
762  /*
763  * shift out the middle component (green) to
764  * the high 16 bits, and process all three RGB
765  * components at the same time.
766  */
767  s = (s | s << 16) & 0x07e0f81f;
768  d = (d | d << 16) & 0x07e0f81f;
769  d += (s - d) * alpha >> 5;
770  d &= 0x07e0f81f;
771  *dstp++ = (Uint16)(d | d >> 16);
772  },{
773  src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */
774  dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */
775 
776  /* red */
777  src2 = src1;
778  src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 [000r 000r 000r 000r] */
779 
780  dst2 = dst1;
781  dst2 = _mm_srli_pi16(dst2, 11); /* dst2 >> 11 -> dst2 [000r 000r 000r 000r] */
782 
783  /* blend */
784  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
785  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
786  src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
787  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
788  dst2 = _mm_slli_pi16(dst2, 11); /* dst2 << 11 -> dst2 */
789 
790  mm_res = dst2; /* RED -> mm_res */
791 
792  /* green -- process the bits in place */
793  src2 = src1;
794  src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */
795 
796  dst2 = dst1;
797  dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */
798 
799  /* blend */
800  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
801  src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
802  src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
803  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
804 
805  mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */
806 
807  /* blue */
808  src2 = src1;
809  src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */
810 
811  dst2 = dst1;
812  dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */
813 
814  /* blend */
815  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
816  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
817  src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
818  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
819  dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */
820 
821  mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */
822 
823  *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
824 
825  srcp += 4;
826  dstp += 4;
827  }, width);
828  /* *INDENT-ON* */
829  srcp += srcskip;
830  dstp += dstskip;
831  }
832  _mm_empty();
833  }
834 }
835 
836 /* fast RGB555->RGB555 blending with surface alpha */
837 static void
838 Blit555to555SurfaceAlphaMMX(SDL_BlitInfo * info)
839 {
840  unsigned alpha = info->a;
841  if (alpha == 128) {
842  Blit16to16SurfaceAlpha128(info, 0xfbde);
843  } else {
844  int width = info->dst_w;
845  int height = info->dst_h;
846  Uint16 *srcp = (Uint16 *) info->src;
847  int srcskip = info->src_skip >> 1;
848  Uint16 *dstp = (Uint16 *) info->dst;
849  int dstskip = info->dst_skip >> 1;
850  Uint32 s, d;
851 
852  __m64 src1, dst1, src2, dst2, rmask, gmask, bmask, mm_res, mm_alpha;
853 
854  alpha &= ~(1 + 2 + 4); /* cut alpha to get the exact same behaviour */
855  mm_alpha = _mm_set_pi32(0, alpha); /* 0000000A -> mm_alpha */
856  alpha >>= 3; /* downscale alpha to 5 bits */
857 
858  mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
859  mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
860  /* position alpha to allow for mullo and mulhi on diff channels
861  to reduce the number of operations */
862  mm_alpha = _mm_slli_si64(mm_alpha, 3);
863 
864  /* Setup the 555 color channel masks */
865  rmask = _mm_set_pi32(0x7C007C00, 0x7C007C00); /* MASKRED -> rmask */
866  gmask = _mm_set_pi32(0x03E003E0, 0x03E003E0); /* MASKGREEN -> gmask */
867  bmask = _mm_set_pi32(0x001F001F, 0x001F001F); /* MASKBLUE -> bmask */
868 
869  while (height--) {
870  /* *INDENT-OFF* */
872  {
873  s = *srcp++;
874  d = *dstp;
875  /*
876  * shift out the middle component (green) to
877  * the high 16 bits, and process all three RGB
878  * components at the same time.
879  */
880  s = (s | s << 16) & 0x03e07c1f;
881  d = (d | d << 16) & 0x03e07c1f;
882  d += (s - d) * alpha >> 5;
883  d &= 0x03e07c1f;
884  *dstp++ = (Uint16)(d | d >> 16);
885  },{
886  s = *srcp++;
887  d = *dstp;
888  /*
889  * shift out the middle component (green) to
890  * the high 16 bits, and process all three RGB
891  * components at the same time.
892  */
893  s = (s | s << 16) & 0x03e07c1f;
894  d = (d | d << 16) & 0x03e07c1f;
895  d += (s - d) * alpha >> 5;
896  d &= 0x03e07c1f;
897  *dstp++ = (Uint16)(d | d >> 16);
898  s = *srcp++;
899  d = *dstp;
900  /*
901  * shift out the middle component (green) to
902  * the high 16 bits, and process all three RGB
903  * components at the same time.
904  */
905  s = (s | s << 16) & 0x03e07c1f;
906  d = (d | d << 16) & 0x03e07c1f;
907  d += (s - d) * alpha >> 5;
908  d &= 0x03e07c1f;
909  *dstp++ = (Uint16)(d | d >> 16);
910  },{
911  src1 = *(__m64*)srcp; /* 4 src pixels -> src1 */
912  dst1 = *(__m64*)dstp; /* 4 dst pixels -> dst1 */
913 
914  /* red -- process the bits in place */
915  src2 = src1;
916  src2 = _mm_and_si64(src2, rmask); /* src & MASKRED -> src2 */
917 
918  dst2 = dst1;
919  dst2 = _mm_and_si64(dst2, rmask); /* dst & MASKRED -> dst2 */
920 
921  /* blend */
922  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
923  src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
924  src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
925  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
926  dst2 = _mm_and_si64(dst2, rmask); /* dst2 & MASKRED -> dst2 */
927 
928  mm_res = dst2; /* RED -> mm_res */
929 
930  /* green -- process the bits in place */
931  src2 = src1;
932  src2 = _mm_and_si64(src2, gmask); /* src & MASKGREEN -> src2 */
933 
934  dst2 = dst1;
935  dst2 = _mm_and_si64(dst2, gmask); /* dst & MASKGREEN -> dst2 */
936 
937  /* blend */
938  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
939  src2 = _mm_mulhi_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
940  src2 = _mm_slli_pi16(src2, 5); /* src2 << 5 -> src2 */
941  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
942 
943  mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN -> mm_res */
944 
945  /* blue */
946  src2 = src1; /* src -> src2 */
947  src2 = _mm_and_si64(src2, bmask); /* src & MASKBLUE -> src2[000b 000b 000b 000b] */
948 
949  dst2 = dst1; /* dst -> dst2 */
950  dst2 = _mm_and_si64(dst2, bmask); /* dst & MASKBLUE -> dst2[000b 000b 000b 000b] */
951 
952  /* blend */
953  src2 = _mm_sub_pi16(src2, dst2);/* src - dst -> src2 */
954  src2 = _mm_mullo_pi16(src2, mm_alpha); /* src2 * alpha -> src2 */
955  src2 = _mm_srli_pi16(src2, 11); /* src2 >> 11 -> src2 */
956  dst2 = _mm_add_pi16(src2, dst2); /* src2 + dst2 -> dst2 */
957  dst2 = _mm_and_si64(dst2, bmask); /* dst2 & MASKBLUE -> dst2 */
958 
959  mm_res = _mm_or_si64(mm_res, dst2); /* RED | GREEN | BLUE -> mm_res */
960 
961  *(__m64*)dstp = mm_res; /* mm_res -> 4 dst pixels */
962 
963  srcp += 4;
964  dstp += 4;
965  }, width);
966  /* *INDENT-ON* */
967  srcp += srcskip;
968  dstp += dstskip;
969  }
970  _mm_empty();
971  }
972 }
973 
974 #endif /* __MMX__ */
975 
976 /* fast RGB565->RGB565 blending with surface alpha */
977 static void
979 {
980  unsigned alpha = info->a;
981  if (alpha == 128) {
982  Blit16to16SurfaceAlpha128(info, 0xf7de);
983  } else {
984  int width = info->dst_w;
985  int height = info->dst_h;
986  Uint16 *srcp = (Uint16 *) info->src;
987  int srcskip = info->src_skip >> 1;
988  Uint16 *dstp = (Uint16 *) info->dst;
989  int dstskip = info->dst_skip >> 1;
990  alpha >>= 3; /* downscale alpha to 5 bits */
991 
992  while (height--) {
993  /* *INDENT-OFF* */
994  DUFFS_LOOP4({
995  Uint32 s = *srcp++;
996  Uint32 d = *dstp;
997  /*
998  * shift out the middle component (green) to
999  * the high 16 bits, and process all three RGB
1000  * components at the same time.
1001  */
1002  s = (s | s << 16) & 0x07e0f81f;
1003  d = (d | d << 16) & 0x07e0f81f;
1004  d += (s - d) * alpha >> 5;
1005  d &= 0x07e0f81f;
1006  *dstp++ = (Uint16)(d | d >> 16);
1007  }, width);
1008  /* *INDENT-ON* */
1009  srcp += srcskip;
1010  dstp += dstskip;
1011  }
1012  }
1013 }
1014 
1015 /* fast RGB555->RGB555 blending with surface alpha */
1016 static void
1018 {
1019  unsigned alpha = info->a; /* downscale alpha to 5 bits */
1020  if (alpha == 128) {
1021  Blit16to16SurfaceAlpha128(info, 0xfbde);
1022  } else {
1023  int width = info->dst_w;
1024  int height = info->dst_h;
1025  Uint16 *srcp = (Uint16 *) info->src;
1026  int srcskip = info->src_skip >> 1;
1027  Uint16 *dstp = (Uint16 *) info->dst;
1028  int dstskip = info->dst_skip >> 1;
1029  alpha >>= 3; /* downscale alpha to 5 bits */
1030 
1031  while (height--) {
1032  /* *INDENT-OFF* */
1033  DUFFS_LOOP4({
1034  Uint32 s = *srcp++;
1035  Uint32 d = *dstp;
1036  /*
1037  * shift out the middle component (green) to
1038  * the high 16 bits, and process all three RGB
1039  * components at the same time.
1040  */
1041  s = (s | s << 16) & 0x03e07c1f;
1042  d = (d | d << 16) & 0x03e07c1f;
1043  d += (s - d) * alpha >> 5;
1044  d &= 0x03e07c1f;
1045  *dstp++ = (Uint16)(d | d >> 16);
1046  }, width);
1047  /* *INDENT-ON* */
1048  srcp += srcskip;
1049  dstp += dstskip;
1050  }
1051  }
1052 }
1053 
1054 /* fast ARGB8888->RGB565 blending with pixel alpha */
1055 static void
1057 {
1058  int width = info->dst_w;
1059  int height = info->dst_h;
1060  Uint32 *srcp = (Uint32 *) info->src;
1061  int srcskip = info->src_skip >> 2;
1062  Uint16 *dstp = (Uint16 *) info->dst;
1063  int dstskip = info->dst_skip >> 1;
1064 
1065  while (height--) {
1066  /* *INDENT-OFF* */
1067  DUFFS_LOOP4({
1068  Uint32 s = *srcp;
1069  unsigned alpha = s >> 27; /* downscale alpha to 5 bits */
1070  /* FIXME: Here we special-case opaque alpha since the
1071  compositioning used (>>8 instead of /255) doesn't handle
1072  it correctly. Also special-case alpha=0 for speed?
1073  Benchmark this! */
1074  if(alpha) {
1075  if(alpha == (SDL_ALPHA_OPAQUE >> 3)) {
1076  *dstp = (Uint16)((s >> 8 & 0xf800) + (s >> 5 & 0x7e0) + (s >> 3 & 0x1f));
1077  } else {
1078  Uint32 d = *dstp;
1079  /*
1080  * convert source and destination to G0RAB65565
1081  * and blend all components at the same time
1082  */
1083  s = ((s & 0xfc00) << 11) + (s >> 8 & 0xf800)
1084  + (s >> 3 & 0x1f);
1085  d = (d | d << 16) & 0x07e0f81f;
1086  d += (s - d) * alpha >> 5;
1087  d &= 0x07e0f81f;
1088  *dstp = (Uint16)(d | d >> 16);
1089  }
1090  }
1091  srcp++;
1092  dstp++;
1093  }, width);
1094  /* *INDENT-ON* */
1095  srcp += srcskip;
1096  dstp += dstskip;
1097  }
1098 }
1099 
1100 /* fast ARGB8888->RGB555 blending with pixel alpha */
1101 static void
1103 {
1104  int width = info->dst_w;
1105  int height = info->dst_h;
1106  Uint32 *srcp = (Uint32 *) info->src;
1107  int srcskip = info->src_skip >> 2;
1108  Uint16 *dstp = (Uint16 *) info->dst;
1109  int dstskip = info->dst_skip >> 1;
1110 
1111  while (height--) {
1112  /* *INDENT-OFF* */
1113  DUFFS_LOOP4({
1114  unsigned alpha;
1115  Uint32 s = *srcp;
1116  alpha = s >> 27; /* downscale alpha to 5 bits */
1117  /* FIXME: Here we special-case opaque alpha since the
1118  compositioning used (>>8 instead of /255) doesn't handle
1119  it correctly. Also special-case alpha=0 for speed?
1120  Benchmark this! */
1121  if(alpha) {
1122  if(alpha == (SDL_ALPHA_OPAQUE >> 3)) {
1123  *dstp = (Uint16)((s >> 9 & 0x7c00) + (s >> 6 & 0x3e0) + (s >> 3 & 0x1f));
1124  } else {
1125  Uint32 d = *dstp;
1126  /*
1127  * convert source and destination to G0RAB65565
1128  * and blend all components at the same time
1129  */
1130  s = ((s & 0xf800) << 10) + (s >> 9 & 0x7c00)
1131  + (s >> 3 & 0x1f);
1132  d = (d | d << 16) & 0x03e07c1f;
1133  d += (s - d) * alpha >> 5;
1134  d &= 0x03e07c1f;
1135  *dstp = (Uint16)(d | d >> 16);
1136  }
1137  }
1138  srcp++;
1139  dstp++;
1140  }, width);
1141  /* *INDENT-ON* */
1142  srcp += srcskip;
1143  dstp += dstskip;
1144  }
1145 }
1146 
1147 /* General (slow) N->N blending with per-surface alpha */
1148 static void
1150 {
1151  int width = info->dst_w;
1152  int height = info->dst_h;
1153  Uint8 *src = info->src;
1154  int srcskip = info->src_skip;
1155  Uint8 *dst = info->dst;
1156  int dstskip = info->dst_skip;
1157  SDL_PixelFormat *srcfmt = info->src_fmt;
1158  SDL_PixelFormat *dstfmt = info->dst_fmt;
1159  int srcbpp = srcfmt->BytesPerPixel;
1160  int dstbpp = dstfmt->BytesPerPixel;
1161  Uint32 Pixel;
1162  unsigned sR, sG, sB;
1163  unsigned dR, dG, dB, dA;
1164  const unsigned sA = info->a;
1165 
1166  if (sA) {
1167  while (height--) {
1168  /* *INDENT-OFF* */
1169  DUFFS_LOOP4(
1170  {
1171  DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
1172  DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1173  ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1174  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1175  src += srcbpp;
1176  dst += dstbpp;
1177  },
1178  width);
1179  /* *INDENT-ON* */
1180  src += srcskip;
1181  dst += dstskip;
1182  }
1183  }
1184 }
1185 
1186 /* General (slow) colorkeyed N->N blending with per-surface alpha */
1187 static void
1189 {
1190  int width = info->dst_w;
1191  int height = info->dst_h;
1192  Uint8 *src = info->src;
1193  int srcskip = info->src_skip;
1194  Uint8 *dst = info->dst;
1195  int dstskip = info->dst_skip;
1196  SDL_PixelFormat *srcfmt = info->src_fmt;
1197  SDL_PixelFormat *dstfmt = info->dst_fmt;
1198  Uint32 ckey = info->colorkey;
1199  int srcbpp = srcfmt->BytesPerPixel;
1200  int dstbpp = dstfmt->BytesPerPixel;
1201  Uint32 Pixel;
1202  unsigned sR, sG, sB;
1203  unsigned dR, dG, dB, dA;
1204  const unsigned sA = info->a;
1205 
1206  while (height--) {
1207  /* *INDENT-OFF* */
1208  DUFFS_LOOP4(
1209  {
1210  RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
1211  if(sA && Pixel != ckey) {
1212  RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
1213  DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1214  ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1215  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1216  }
1217  src += srcbpp;
1218  dst += dstbpp;
1219  },
1220  width);
1221  /* *INDENT-ON* */
1222  src += srcskip;
1223  dst += dstskip;
1224  }
1225 }
1226 
1227 /* General (slow) N->N blending with pixel alpha */
1228 static void
1230 {
1231  int width = info->dst_w;
1232  int height = info->dst_h;
1233  Uint8 *src = info->src;
1234  int srcskip = info->src_skip;
1235  Uint8 *dst = info->dst;
1236  int dstskip = info->dst_skip;
1237  SDL_PixelFormat *srcfmt = info->src_fmt;
1238  SDL_PixelFormat *dstfmt = info->dst_fmt;
1239  int srcbpp;
1240  int dstbpp;
1241  Uint32 Pixel;
1242  unsigned sR, sG, sB, sA;
1243  unsigned dR, dG, dB, dA;
1244 
1245  /* Set up some basic variables */
1246  srcbpp = srcfmt->BytesPerPixel;
1247  dstbpp = dstfmt->BytesPerPixel;
1248 
1249  while (height--) {
1250  /* *INDENT-OFF* */
1251  DUFFS_LOOP4(
1252  {
1253  DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
1254  if(sA) {
1255  DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
1256  ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
1257  ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
1258  }
1259  src += srcbpp;
1260  dst += dstbpp;
1261  },
1262  width);
1263  /* *INDENT-ON* */
1264  src += srcskip;
1265  dst += dstskip;
1266  }
1267 }
1268 
1269 
1270 SDL_BlitFunc
1272 {
1273  SDL_PixelFormat *sf = surface->format;
1274  SDL_PixelFormat *df = surface->map->dst->format;
1275 
1276  switch (surface->map->info.flags & ~SDL_COPY_RLE_MASK) {
1277  case SDL_COPY_BLEND:
1278  /* Per-pixel alpha blits */
1279  switch (df->BytesPerPixel) {
1280  case 1:
1281  return BlitNto1PixelAlpha;
1282 
1283  case 2:
1284  if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
1285  && sf->Gmask == 0xff00
1286  && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
1287  || (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
1288  if (df->Gmask == 0x7e0)
1289  return BlitARGBto565PixelAlpha;
1290  else if (df->Gmask == 0x3e0)
1291  return BlitARGBto555PixelAlpha;
1292  }
1293  return BlitNtoNPixelAlpha;
1294 
1295  case 4:
1296  if (sf->Rmask == df->Rmask
1297  && sf->Gmask == df->Gmask
1298  && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
1299 #if defined(__MMX__) || defined(__3dNOW__)
1300  if (sf->Rshift % 8 == 0
1301  && sf->Gshift % 8 == 0
1302  && sf->Bshift % 8 == 0
1303  && sf->Ashift % 8 == 0 && sf->Aloss == 0) {
1304 #ifdef __3dNOW__
1305  if (SDL_Has3DNow())
1306  return BlitRGBtoRGBPixelAlphaMMX3DNOW;
1307 #endif
1308 #ifdef __MMX__
1309  if (SDL_HasMMX())
1310  return BlitRGBtoRGBPixelAlphaMMX;
1311 #endif
1312  }
1313 #endif /* __MMX__ || __3dNOW__ */
1314  if (sf->Amask == 0xff000000) {
1315  return BlitRGBtoRGBPixelAlpha;
1316  }
1317  }
1318  return BlitNtoNPixelAlpha;
1319 
1320  case 3:
1321  default:
1322  return BlitNtoNPixelAlpha;
1323  }
1324  break;
1325 
1327  if (sf->Amask == 0) {
1328  /* Per-surface alpha blits */
1329  switch (df->BytesPerPixel) {
1330  case 1:
1331  return BlitNto1SurfaceAlpha;
1332 
1333  case 2:
1334  if (surface->map->identity) {
1335  if (df->Gmask == 0x7e0) {
1336 #ifdef __MMX__
1337  if (SDL_HasMMX())
1338  return Blit565to565SurfaceAlphaMMX;
1339  else
1340 #endif
1341  return Blit565to565SurfaceAlpha;
1342  } else if (df->Gmask == 0x3e0) {
1343 #ifdef __MMX__
1344  if (SDL_HasMMX())
1345  return Blit555to555SurfaceAlphaMMX;
1346  else
1347 #endif
1348  return Blit555to555SurfaceAlpha;
1349  }
1350  }
1351  return BlitNtoNSurfaceAlpha;
1352 
1353  case 4:
1354  if (sf->Rmask == df->Rmask
1355  && sf->Gmask == df->Gmask
1356  && sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
1357 #ifdef __MMX__
1358  if (sf->Rshift % 8 == 0
1359  && sf->Gshift % 8 == 0
1360  && sf->Bshift % 8 == 0 && SDL_HasMMX())
1361  return BlitRGBtoRGBSurfaceAlphaMMX;
1362 #endif
1363  if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
1364  return BlitRGBtoRGBSurfaceAlpha;
1365  }
1366  }
1367  return BlitNtoNSurfaceAlpha;
1368 
1369  case 3:
1370  default:
1371  return BlitNtoNSurfaceAlpha;
1372  }
1373  }
1374  break;
1375 
1377  if (sf->Amask == 0) {
1378  if (df->BytesPerPixel == 1) {
1379  return BlitNto1SurfaceAlphaKey;
1380  } else {
1381  return BlitNtoNSurfaceAlphaKey;
1382  }
1383  }
1384  break;
1385  }
1386 
1387  return NULL;
1388 }
1389 
1390 /* vi: set ts=4 sw=4 expandtab: */
Uint8 * table
Definition: SDL_blit.h:67
static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1056
#define BLEND16_50(d, s, mask)
Definition: SDL_blit_A.c:590
static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:30
GLdouble s
Definition: glew.h:1376
static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1188
#define SDL_COPY_COLORKEY
Definition: SDL_blit.h:39
int src_skip
Definition: SDL_blit.h:60
#define RETRIEVE_RGB_PIXEL(buf, bpp, Pixel)
Definition: SDL_blit.h:145
Uint8 g
Definition: SDL_pixels.h:255
#define NULL
Definition: ftobjs.h:61
#define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA)
Definition: SDL_blit.h:453
Uint8 BytesPerPixel
Definition: SDL_pixels.h:277
GLclampd n
Definition: glew.h:7287
#define ALPHA_BLEND_RGB(sR, sG, sB, A, dR, dG, dB)
Definition: SDL_blit.h:444
static void BlitNto1SurfaceAlphaKey(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:123
SDL_PixelFormat * src_fmt
Definition: SDL_blit.h:65
EGLSurface surface
Definition: eglext.h:74
#define ASSEMBLE_RGBA(buf, bpp, fmt, r, g, b, a)
Definition: SDL_blit.h:401
A collection of pixels used in software blitting.
Definition: SDL_surface.h:69
DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void)
Definition: SDL_cpuinfo.c:548
EGLSurface EGLint EGLint EGLint EGLint height
Definition: eglext.h:293
#define SDL_COPY_RLE_MASK
Definition: SDL_blit.h:44
Uint8 b
Definition: SDL_pixels.h:256
int dst_skip
Definition: SDL_blit.h:64
return Display return Display Bool Bool int d
Definition: SDL_x11sym.h:30
static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1102
static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1229
Uint32 colorkey
Definition: SDL_blit.h:69
uint32_t Uint32
An unsigned 32-bit integer type.
Definition: SDL_stdinc.h:145
Uint8 * dst
Definition: SDL_blit.h:61
struct SDL_BlitMap * map
Definition: SDL_surface.h:88
static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1017
#define DISEMBLE_RGBA(buf, bpp, fmt, Pixel, r, g, b, a)
Definition: SDL_blit.h:352
Uint8 r
Definition: SDL_pixels.h:254
GLenum GLenum dst
Definition: glew.h:2396
static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:395
EGLSurface EGLint EGLint EGLint width
Definition: eglext.h:293
#define RGB_FROM_PIXEL(Pixel, fmt, r, g, b)
Definition: SDL_blit.h:121
static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:978
uint64_t Uint64
An unsigned 64-bit integer type.
Definition: SDL_stdinc.h:154
#define DUFFS_LOOP4(pixel_copy_increment, width)
Definition: SDL_blit.h:487
static void BlitRGBtoRGBPixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:462
#define DUFFS_LOOP(pixel_copy_increment, width)
Definition: SDL_blit.h:499
DECLSPEC SDL_bool SDLCALL SDL_Has3DNow(void)
Definition: SDL_cpuinfo.c:557
static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:420
unsigned int uintptr_t
GLclampf GLclampf GLclampf alpha
Definition: glew.h:1506
Uint8 * src
Definition: SDL_blit.h:57
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
Definition: glew.h:11582
#define DUFFS_LOOP_124(pixel_copy_increment1,pixel_copy_increment2,pixel_copy_increment4, width)
Definition: SDL_blit.h:503
static void Blit16to16SurfaceAlpha128(SDL_BlitInfo *info, Uint16 mask)
Definition: SDL_blit_A.c:599
SDL_PixelFormat * dst_fmt
Definition: SDL_blit.h:66
SDL_Surface * dst
Definition: SDL_blit.h:87
SDL_Color * colors
Definition: SDL_pixels.h:264
SDL_PixelFormat * format
Definition: SDL_surface.h:72
#define SDL_COPY_MODULATE_ALPHA
Definition: SDL_blit.h:35
uint8_t Uint8
An unsigned 8-bit integer type.
Definition: SDL_stdinc.h:129
static void BlitNto1PixelAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:77
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
Definition: gl2ext.h:961
static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info)
Definition: SDL_blit_A.c:1149
#define DISEMBLE_RGB(buf, bpp, fmt, Pixel, r, g, b)
Definition: SDL_blit.h:176
GLint GLint GLint GLint GLint w
Definition: gl2ext.h:1215
uint16_t Uint16
An unsigned 16-bit integer type.
Definition: SDL_stdinc.h:137
SDL_Palette * palette
Definition: SDL_pixels.h:275
GLenum src
Definition: glew.h:2396
SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface)
Definition: SDL_blit_A.c:1271
#define SDL_ALPHA_OPAQUE
Definition: SDL_pixels.h:43
int identity
Definition: SDL_blit.h:88
#define BLEND2x16_50(d, s, mask)
Definition: SDL_blit_A.c:594
#define SDL_COPY_BLEND
Definition: SDL_blit.h:36
SDL_BlitInfo info
Definition: SDL_blit.h:91
Uint8 a
Definition: SDL_blit.h:70