Browse Source

SDL_blit_N.c: Move ppc64le swizzle outside of loop

An in-place swizzle mutation was erroneously inside of a loop, which
caused each consecutive 4-pixel vector to alternate between correct and
incorrect endianness.

The bug was introduced in 715e070d299fc547cce8c52915b75e6316af6bff.

Thanks to RobbieAB for reporting the bug.

Fixes https://github.com/libsdl-org/SDL/issues/3428

(cherry picked from commit 9142292f4ad6d9bc8a2bc7c874e2fde54befeb9b)
Jeremy Rand 1 year ago
parent
commit
5e74365e55
1 changed files with 15 additions and 12 deletions
  1. 15 12
      src/video/SDL_blit_N.c

+ 15 - 12
src/video/SDL_blit_N.c

@@ -624,6 +624,11 @@ static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
     ((unsigned int *)(char *)&vrgbmask)[0] = rgbmask;
     vrgbmask = vec_splat(vrgbmask, 0);
 
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
+    /* reorder bytes for PowerPC little endian */
+    vpermute = reorder_ppc64le_vec(vpermute);
+#endif
+
     while (height--) {
 #define ONE_PIXEL_BLEND(condition, widthvar)                    \
     if (copy_alpha) {                                           \
@@ -673,10 +678,6 @@ static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
                 /* vsel is set for items that match the key */
                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
-#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
-                /* reorder bytes for PowerPC little endian */
-                vpermute = reorder_ppc64le_vec(vpermute);
-#endif
                 /* permute the src vec to the dest format */
                 vs = vec_perm(vs, valpha, vpermute);
                 /* load the destination vec */
@@ -724,6 +725,11 @@ static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
     SDL_assert(srcfmt->BytesPerPixel == 4);
     SDL_assert(dstfmt->BytesPerPixel == 4);
 
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
+    /* reorder bytes for PowerPC little endian */
+    vpermute = reorder_ppc64le_vec(vpermute);
+#endif
+
     while (height--) {
         vector unsigned char valigner;
         vector unsigned int vbits;
@@ -755,10 +761,6 @@ static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
             src += 4;
             width -= 4;
             vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
-#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
-            /* reorder bytes for PowerPC little endian */
-            vpermute = reorder_ppc64le_vec(vpermute);
-#endif
             vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
             vec_st(vbits, 0, dst);                    /* store it back out. */
             dst += 4;
@@ -809,6 +811,11 @@ static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
     SDL_assert(srcfmt->BytesPerPixel == 4);
     SDL_assert(dstfmt->BytesPerPixel == 4);
 
+#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
+    /* reorder bytes for PowerPC little endian */
+    vpermute = reorder_ppc64le_vec(vpermute);
+#endif
+
     while (height--) {
         vector unsigned char valigner;
         vector unsigned int vbits;
@@ -848,10 +855,6 @@ static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
             src += 4;
             width -= 4;
             vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
-#if defined(__powerpc__) && (SDL_BYTEORDER == SDL_LIL_ENDIAN)
-            /* reorder bytes for PowerPC little endian */
-            vpermute = reorder_ppc64le_vec(vpermute);
-#endif
             vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
             vec_st(vbits, 0, dst);                    /* store it back out. */
             dst += 4;