Browse Source

Implement accurate, performant 32-bit scalar blitter for ARGB dst case

Isaac Aronson 1 year ago
parent
commit
5cec91e27a
2 changed files with 58 additions and 16 deletions
  1. 29 1
      src/video/SDL_blit.h
  2. 29 15
      src/video/SDL_blit_A.c

+ 29 - 1
src/video/SDL_blit.h

@@ -493,6 +493,13 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
         }                                             \
     }
 
+/* Convert any 32-bit 4-bpp pixel to ARGB format */
+#define PIXEL_TO_ARGB_PIXEL(src, srcfmt, dst)         \
+    do {                                              \
+        Uint8 a, r, g, b;                         \
+        RGBA_FROM_PIXEL(src, srcfmt, r, g, b, a); \
+        dst = a << 24 | r << 16 | g << 8 | b;     \
+    } while (0)
 /* Blend a single color channel or alpha value */
 #define ALPHA_BLEND_CHANNEL(sC, dC, sA)                  \
     do {                                                 \
@@ -509,7 +516,28 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface);
         ALPHA_BLEND_CHANNEL(sG, dG, A);                       \
         ALPHA_BLEND_CHANNEL(sB, dB, A);                       \
     } while (0)
-
+/* Blend the ARGB values of two 32-bit pixels */
+#define ALPHA_BLEND_ARGB_PIXELS(src, dst)                               \
+    do {                                                                \
+        Uint32 srcA = src >> 24;                                        \
+        src |= 0xFF000000;                                              \
+                                                                        \
+        Uint32 srcRB = src & 0x00FF00FF;                                \
+        Uint32 dstRB = dst & 0x00FF00FF;                                \
+                                                                        \
+        Uint32 srcGA = (src >> 8) & 0x00FF00FF;                         \
+        Uint32 dstGA = (dst >> 8) & 0x00FF00FF;                         \
+                                                                        \
+        Uint32 resRB = ((srcRB - dstRB) * srcA) + (dstRB << 8) - dstRB; \
+        resRB += 0x00010001;                                            \
+        resRB += (resRB >> 8) & 0x00FF00FF;                             \
+        resRB = (resRB >> 8) & 0x00FF00FF;                              \
+        Uint32 resGA = ((srcGA - dstGA) * srcA) + (dstGA << 8) - dstGA; \
+        resGA += 0x00010001;                                            \
+        resGA += (resGA >> 8) & 0x00FF00FF;                             \
+        resGA &= 0xFF00FF00;                                            \
+        dst = resRB | resGA;                                            \
+    } while (0)
 /* Blend the RGBA values of two pixels */
 #define ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA) \
     do {                                                 \

+ 29 - 15
src/video/SDL_blit_A.c

@@ -1073,22 +1073,36 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info)
     }
 
     while (height--) {
-        /* *INDENT-OFF* */ /* clang-format off */
-        DUFFS_LOOP4(
-        {
-        DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
-        if (sA) {
-            DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
-            ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
-            ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
+        if (srcbpp == 4 && dstbpp == 4 && dstfmt->Ashift == 24 && dstfmt->Rshift == 16 && dstfmt->Gshift == 8 &&
+            dstfmt->Bshift == 0) {
+            DUFFS_LOOP4(
+            {
+            PIXEL_TO_ARGB_PIXEL(*(Uint32 *) src, srcfmt, Pixel);
+            Uint32 blended = *(Uint32 *) dst;
+            ALPHA_BLEND_ARGB_PIXELS(Pixel, blended);
+            *(Uint32*)dst = blended;
+            src += srcbpp;
+            dst += dstbpp;
+            },
+            width);
+        } else {
+            /* *INDENT-OFF* */ /* clang-format off */
+            DUFFS_LOOP4(
+            {
+            DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA);
+            if (sA) {
+                DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA);
+                ALPHA_BLEND_RGBA(sR, sG, sB, sA, dR, dG, dB, dA);
+                ASSEMBLE_RGBA(dst, dstbpp, dstfmt, dR, dG, dB, dA);
+            }
+            src += srcbpp;
+            dst += dstbpp;
+            },
+            width);
+            /* *INDENT-ON* */ /* clang-format on */
+            src += srcskip;
+            dst += dstskip;
         }
-        src += srcbpp;
-        dst += dstbpp;
-        },
-        width);
-        /* *INDENT-ON* */ /* clang-format on */
-        src += srcskip;
-        dst += dstskip;
     }
     if (freeFormat) {
         SDL_DestroyPixelFormat(dstfmt);