|
@@ -6,16 +6,13 @@
|
|
|
|
|
|
#include "SDL_blit.h"
|
|
|
|
|
|
-#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
|
|
|
-__attribute__((target("sse4.1")))
|
|
|
-#endif
|
|
|
/**
|
|
|
* Using the SSE4.1 instruction set, blit four pixels with alpha blending
|
|
|
* @param src A pointer to two 32-bit pixels of ARGB format to blit into dst
|
|
|
* @param dst A pointer to two 32-bit pixels of ARGB format to retain visual data for while alpha blending
|
|
|
* @return A 128-bit wide vector of two alpha-blended pixels in ARGB format
|
|
|
*/
|
|
|
-__m128i MixRGBA_SSE4_1(__m128i src, __m128i dst) {
|
|
|
+__m128i SDL_TARGETING("sse4.1") MixRGBA_SSE4_1(__m128i src, __m128i dst) {
|
|
|
__m128i src_color = _mm_cvtepu8_epi16(src);
|
|
|
__m128i dst_color = _mm_cvtepu8_epi16(dst);
|
|
|
/**
|
|
@@ -36,7 +33,7 @@ __m128i MixRGBA_SSE4_1(__m128i src, __m128i dst) {
|
|
|
return _mm_add_epi8(reduced, dst);
|
|
|
}
|
|
|
|
|
|
-Uint32 convertPixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) {
|
|
|
+Uint32 AlignPixelToSDL_PixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) {
|
|
|
Uint8 a = (color >> srcFormat->Ashift) & 0xFF;
|
|
|
Uint8 r = (color >> srcFormat->Rshift) & 0xFF;
|
|
|
Uint8 g = (color >> srcFormat->Gshift) & 0xFF;
|
|
@@ -45,13 +42,10 @@ Uint32 convertPixelFormat(Uint32 color, const SDL_PixelFormat* srcFormat) {
|
|
|
return (a << 24) | (r << 16) | (g << 8) | b;
|
|
|
}
|
|
|
|
|
|
-#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
|
|
|
-__attribute__((target("sse4.1")))
|
|
|
-#endif
|
|
|
/*
|
|
|
* This helper function converts arbitrary pixel format data into ARGB form with a 4 pixel-wide shuffle
|
|
|
*/
|
|
|
-__m128i convertPixelFormatsx4(__m128i colors, const SDL_PixelFormat* srcFormat) {
|
|
|
+__m128i SDL_TARGETING("sse4.1") AlignPixelToSDL_PixelFormat_x4(__m128i colors, const SDL_PixelFormat* srcFormat) {
|
|
|
// Create shuffle masks based on the source SDL_PixelFormat to ARGB
|
|
|
__m128i srcShuffleMask = _mm_set_epi8(
|
|
|
srcFormat->Ashift / 8 + 12, srcFormat->Rshift / 8 + 12, srcFormat->Gshift / 8 + 12, srcFormat->Bshift / 8 + 12,
|
|
@@ -64,10 +58,7 @@ __m128i convertPixelFormatsx4(__m128i colors, const SDL_PixelFormat* srcFormat)
|
|
|
return _mm_shuffle_epi8(colors, srcShuffleMask);
|
|
|
}
|
|
|
|
|
|
-#if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(__clang__))
|
|
|
-__attribute__((target("sse4.1")))
|
|
|
-#endif
|
|
|
-void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
|
|
|
+void SDL_TARGETING("sse4.1") BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
|
|
|
int width = info->dst_w;
|
|
|
int height = info->dst_h;
|
|
|
Uint8 *src = info->src;
|
|
@@ -83,7 +74,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
|
|
|
/* Process 4-wide chunks of source color data that may be in wrong format into buffer */
|
|
|
for (int i = 0; i < chunks; i += 1) {
|
|
|
__m128i colors = _mm_loadu_si128((__m128i*)(src + i * 16));
|
|
|
- _mm_storeu_si128((__m128i*)(buffer + i * 16), convertPixelFormatsx4(colors, srcfmt));
|
|
|
+ _mm_storeu_si128((__m128i*)(buffer + i * 16), AlignPixelToSDL_PixelFormat_x4(colors, srcfmt));
|
|
|
}
|
|
|
|
|
|
/* Alpha-blend in 2-wide chunks from buffer into destination */
|
|
@@ -102,7 +93,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
|
|
|
Uint32 *src_ptr = ((Uint32*)(src + (offset * 4)));
|
|
|
Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4)));
|
|
|
__m128i c_src = _mm_loadu_si64(src_ptr);
|
|
|
- c_src = convertPixelFormatsx4(c_src, srcfmt);
|
|
|
+ c_src = AlignPixelToSDL_PixelFormat_x4(c_src, srcfmt);
|
|
|
__m128i c_dst = _mm_loadu_si64(dst_ptr);
|
|
|
__m128i c_mix = MixRGBA_SSE4_1(c_src, c_dst);
|
|
|
_mm_storeu_si64(dst_ptr, c_mix);
|
|
@@ -112,7 +103,7 @@ void BlitNtoNPixelAlpha_SSE4_1(SDL_BlitInfo* info) {
|
|
|
if (remaining_pixels == 1) {
|
|
|
Uint32 *src_ptr = ((Uint32*)(src + (offset * 4)));
|
|
|
Uint32 *dst_ptr = ((Uint32*)(dst + (offset * 4)));
|
|
|
- Uint32 pixel = convertPixelFormat(*src_ptr, srcfmt);
|
|
|
+ Uint32 pixel = AlignPixelToSDL_PixelFormat(*src_ptr, srcfmt);
|
|
|
/* Old GCC has bad or no _mm_loadu_si32 */
|
|
|
#if defined(__GNUC__) && (__GNUC__ < 11)
|
|
|
__m128i c_src = _mm_set_epi32(0, 0, 0, pixel);
|