Explorar el Código

Fixed bug 4290 - add fastpaths for format conversion in BlitNtoN

All following conversion are faster (no colorkey, no blending).
(ratio isn't very accurate)

ABGR8888 -> ARGB8888 :  faster x6   (2655837 -> 416607)
ABGR8888 -> BGR24 :  faster x7   (2470117 -> 325693)
ABGR8888 -> RGB24 :  faster x7   (2478107 -> 335445)
ABGR8888 -> RGB888 :  faster x9   (3178524 -> 333859)

ARGB8888 -> ABGR8888 :  faster x6   (2648366 -> 406977)
ARGB8888 -> BGR24 :  faster x7   (2474978 -> 327819)
ARGB8888 -> BGR888 :  faster x9   (3189072 -> 326710)
ARGB8888 -> RGB24 :  faster x7   (2473689 -> 324729)

BGR24 -> ABGR8888 :  faster x6   (2268763 -> 359946)
BGR24 -> ARGB8888 :  faster x6   (2306393 -> 359213)
BGR24 -> BGR888 :  faster x6   (2231141 -> 324195)
BGR24 -> RGB24 :  faster x4   (1557835 -> 322033)
BGR24 -> RGB888 :  faster x6   (2229854 -> 323849)

BGR888 -> ARGB8888 :  faster x8   (3215202 -> 363137)
BGR888 -> BGR24 :  faster x7   (2474775 -> 347916)
BGR888 -> RGB24 :  faster x7   (2532783 -> 327354)
BGR888 -> RGB888 :  faster x9   (3134634 -> 344987)

RGB24 -> ABGR8888 :  faster x6   (2229486 -> 358919)
RGB24 -> ARGB8888 :  faster x6   (2271587 -> 358521)
RGB24 -> BGR24 :  faster x4   (1530913 -> 321149)
RGB24 -> BGR888 :  faster x6   (2227284 -> 327453)
RGB24 -> RGB888 :  faster x6   (2227125 -> 329061)

RGB888 -> ABGR8888 :  faster x8   (3163292 -> 362445)
RGB888 -> BGR24 :  faster x7   (2469489 -> 327127)
RGB888 -> BGR888 :  faster x9   (3190526 -> 326022)
RGB888 -> RGB24 :  faster x7   (2479084 -> 324982)
Sylvain Becker hace 6 años
padre
commit
1128d57316
Se han modificado 1 ficheros con 176 adiciones y 1 borrados
  1. 176 1
      src/video/SDL_blit_N.c

+ 176 - 1
src/video/SDL_blit_N.c

@@ -2515,6 +2515,146 @@ BlitNto2101010(SDL_BlitInfo * info)
     }
 }
 
+/* Blit_3or4_to_3or4__same_rgb: 3 or 4 bpp, same RGB triplet */
+static void
+Blit_3or4_to_3or4__same_rgb(SDL_BlitInfo * info)
+{
+    int width = info->dst_w;
+    int height = info->dst_h;
+    Uint8 *src = info->src;
+    int srcskip = info->src_skip;
+    Uint8 *dst = info->dst;
+    int dstskip = info->dst_skip;
+    SDL_PixelFormat *srcfmt = info->src_fmt;
+    int srcbpp = srcfmt->BytesPerPixel;
+    SDL_PixelFormat *dstfmt = info->dst_fmt;
+    int dstbpp = dstfmt->BytesPerPixel;
+
+    if (dstfmt->Amask) {
+        /* SET_ALPHA */
+        unsigned alpha = info->a;
+        int alphashift = alpha << 24;
+        while (height--) {
+            /* *INDENT-OFF* */
+            DUFFS_LOOP(
+            {
+                Uint32  *dst32 = (Uint32*)dst;
+                unsigned s0 = src[0];
+                unsigned s1 = src[1];
+                unsigned s2 = src[2];
+                *dst32 = (s0) | (s1 << 8) | (s2 << 16) | alphashift;
+                dst += dstbpp;
+                src += srcbpp;
+            }, width);
+            /* *INDENT-ON* */
+            src += srcskip;
+            dst += dstskip;
+        }
+    } else {
+        /* NO_ALPHA */
+        while (height--) {
+            /* *INDENT-OFF* */
+            DUFFS_LOOP(
+            {
+                Uint32  *dst32 = (Uint32*)dst;
+                unsigned s0 = src[0];
+                unsigned s1 = src[1];
+                unsigned s2 = src[2];
+                *dst32 = (s0) | (s1 << 8) | (s2 << 16);
+                dst += dstbpp;
+                src += srcbpp;
+            }, width);
+            /* *INDENT-ON* */
+            src += srcskip;
+            dst += dstskip;
+        }
+    }
+    return;
+}
+
+/* Blit_3or4_to_3or4__inversed_rgb: 3 or 4 bpp, inversed RGB triplet */
+static void
+Blit_3or4_to_3or4__inversed_rgb(SDL_BlitInfo * info)
+{
+    int width = info->dst_w;
+    int height = info->dst_h;
+    Uint8 *src = info->src;
+    int srcskip = info->src_skip;
+    Uint8 *dst = info->dst;
+    int dstskip = info->dst_skip;
+    SDL_PixelFormat *srcfmt = info->src_fmt;
+    int srcbpp = srcfmt->BytesPerPixel;
+    SDL_PixelFormat *dstfmt = info->dst_fmt;
+    int dstbpp = dstfmt->BytesPerPixel;
+
+    if (dstfmt->Amask) {
+
+        if (srcfmt->Amask) {
+            /* COPY_ALPHA */
+            /* Only to switch ABGR8888 <-> ARGB8888 */
+            while (height--) {
+                /* *INDENT-OFF* */
+                DUFFS_LOOP(
+                {
+                    Uint32  *dst32 = (Uint32*)dst;
+                    unsigned s0 = src[0];
+                    unsigned s1 = src[1];
+                    unsigned s2 = src[2];
+                    unsigned alphashift = src[3] << 24;
+                    /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+                    *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
+                    dst += dstbpp;
+                    src += srcbpp;
+                }, width);
+                /* *INDENT-ON* */
+                src += srcskip;
+                dst += dstskip;
+            }
+        } else {
+            /* SET_ALPHA */
+            unsigned alpha = info->a;
+            int alphashift = alpha << 24;
+            while (height--) {
+                /* *INDENT-OFF* */
+                DUFFS_LOOP(
+                {
+                    Uint32  *dst32 = (Uint32*)dst;
+                    unsigned s0 = src[0];
+                    unsigned s1 = src[1];
+                    unsigned s2 = src[2];
+                    /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+                    *dst32 = (s0 << 16) | (s1 << 8) | (s2) | alphashift;
+                    dst += dstbpp;
+                    src += srcbpp;
+                }, width);
+                /* *INDENT-ON* */
+                src += srcskip;
+                dst += dstskip;
+            }
+        }
+    } else {
+        /* NO_ALPHA */
+        while (height--) {
+            /* *INDENT-OFF* */
+            DUFFS_LOOP(
+            {
+                Uint32  *dst32 = (Uint32*)dst;
+                unsigned s0 = src[0];
+                unsigned s1 = src[1];
+                unsigned s2 = src[2];
+                /* inversed, compared to Blit_3or4_to_3or4__same_rgb */
+                *dst32 = (s0 << 16) | (s1 << 8) | (s2);
+                dst += dstbpp;
+                src += srcbpp;
+            }, width);
+            /* *INDENT-ON* */
+            src += srcskip;
+            dst += dstskip;
+        }
+    }
+    return;
+}
+
 /* Normal N to N optimized blitters */
 #define NO_ALPHA   1
 #define SET_ALPHA  2
@@ -2555,6 +2695,23 @@ static const struct blit_table normal_blit_2[] = {
 };
 
 static const struct blit_table normal_blit_3[] = {
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+    /* 3->4 with same rgb triplet */
+    {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+     0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+    {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+     0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+    /* 3->4 with inversed rgb triplet */
+    {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+     0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+    {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+     0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+    /* 3->3 to switch RGB 24 <-> BGR 24 */
+    {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+     0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
+    {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+     0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA },
+#endif
     /* Default for 24-bit RGB source, never optimized */
     {0, 0, 0, 0, 0, 0, 0, 0, BlitNtoN, 0}
 };
@@ -2571,6 +2728,24 @@ static const struct blit_table normal_blit_4[] = {
     {0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0, 0x0000001F,
      2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
 #endif
+#if SDL_BYTEORDER == SDL_LIL_ENDIAN
+    /* 4->3 with same rgb triplet */
+    {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+     0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+    {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+     0, Blit_3or4_to_3or4__same_rgb, NO_ALPHA | SET_ALPHA},
+    /* 4->3 with inversed rgb triplet */
+    {0x000000FF, 0x0000FF00, 0x00FF0000, 3, 0x00FF0000, 0x0000FF00, 0x000000FF,
+     0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+    {0x00FF0000, 0x0000FF00, 0x000000FF, 3, 0x000000FF, 0x0000FF00, 0x00FF0000,
+     0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA},
+#endif
+    /* 4->4 with inversed rgb triplet, and COPY_ALPHA to switch ABGR8888 <-> ARGB8888 */
+    {0x000000FF, 0x0000FF00, 0x00FF0000, 4, 0x00FF0000, 0x0000FF00, 0x000000FF,
+     0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA},
+    {0x00FF0000, 0x0000FF00, 0x000000FF, 4, 0x000000FF, 0x0000FF00, 0x00FF0000,
+     0, Blit_3or4_to_3or4__inversed_rgb, NO_ALPHA | SET_ALPHA | COPY_ALPHA},
+    /* RBG 888 and RGB 565 */
     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0, 0x0000001F,
      0, Blit_RGB888_RGB565, NO_ALPHA},
     {0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x00007C00, 0x000003E0, 0x0000001F,
@@ -2623,7 +2798,7 @@ SDL_CalculateBlitN(SDL_Surface * surface)
             }
         } else {
             /* Now the meat, choose the blitter we want */
-            int a_need = NO_ALPHA;
+            Uint32 a_need = NO_ALPHA;
             if (dstfmt->Amask)
                 a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
             table = normal_blit[srcfmt->BytesPerPixel - 1];