vor 5 Jahren · 1187b013a5
--- a/src/video/SDL_blit_A.c
+++ b/src/video/SDL_blit_A.c
@@ -422,6 +422,21 @@ BlitRGBtoRGBPixelAlphaARMSIMD(SDL_BlitInfo * info)
 
				 #endif
			
 
				 
			
 
				 #if SDL_ARM_NEON_BLITTERS
			
 
				+void BlitARGBto565PixelAlphaARMNEONAsm(int32_t w, int32_t h, uint16_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
			
 
				+
			
 
				+static void
			
 
				+BlitARGBto565PixelAlphaARMNEON(SDL_BlitInfo * info)
			
 
				+{
			
 
				+    int32_t width = info->dst_w;
			
 
				+    int32_t height = info->dst_h;
			
 
				+    uint16_t *dstp = (uint16_t *)info->dst;
			
 
				+    int32_t dststride = width + (info->dst_skip >> 1);
			
 
				+    uint32_t *srcp = (uint32_t *)info->src;
			
 
				+    int32_t srcstride = width + (info->src_skip >> 2);
			
 
				+
			
 
				+    BlitARGBto565PixelAlphaARMNEONAsm(width, height, dstp, dststride, srcp, srcstride);
			
 
				+}
			
 
				+
			
 
				 void BlitRGBtoRGBPixelAlphaARMNEONAsm(int32_t w, int32_t h, uint32_t *dst, int32_t dst_stride, uint32_t *src, int32_t src_stride);
			
 
				 
			
 
				 static void
			
@@ -1333,14 +1348,21 @@ SDL_CalculateBlitA(SDL_Surface * surface)
 
				             }
			
 
				 
			
 
				         case 2:
			
 
				-#if SDL_ARM_SIMD_BLITTERS
			
 
				+#if SDL_ARM_NEON_BLITTERS || SDL_ARM_SIMD_BLITTERS
			
 
				                 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
			
 
				                     && sf->Gmask == 0xff00 && df->Gmask == 0x7e0
			
 
				                     && ((sf->Rmask == 0xff && df->Rmask == 0x1f)
			
 
				-                    || (sf->Bmask == 0xff && df->Bmask == 0x1f))
			
 
				-                    && SDL_HasARMSIMD())
			
 
				+                    || (sf->Bmask == 0xff && df->Bmask == 0x1f)))
			
 
				+                {
			
 
				+#if SDL_ARM_NEON_BLITTERS
			
 
				+                    if (SDL_HasNEON())
			
 
				+                        return BlitARGBto565PixelAlphaARMNEON;
			
 
				+#endif
			
 
				+#if SDL_ARM_SIMD_BLITTERS
			
 
				+                    if (SDL_HasARMSIMD())
			
 
				                         return BlitARGBto565PixelAlphaARMSIMD;
			
 
				-                else
			
 
				+#endif
			
 
				+                }
			
 
				 #endif
			
 
				                 if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
			
 
				                     && sf->Gmask == 0xff00
			
--- a/src/video/arm/pixman-arm-neon-asm.S
+++ b/src/video/arm/pixman-arm-neon-asm.S
@@ -157,3 +157,91 @@ generate_composite_function \
 
				     RGBtoRGBPixelAlpha_process_pixblock_head, \
			
 
				     RGBtoRGBPixelAlpha_process_pixblock_tail, \
			
 
				     RGBtoRGBPixelAlpha_process_pixblock_tail_head
			
 
				+
			
 
				+ /******************************************************************************/
			
 
				+
			
 
				+.macro ARGBto565PixelAlpha_process_pixblock_head
			
 
				+    vmvn        d6, d3
			
 
				+    vshr.u8     d1, #2
			
 
				+    vshr.u8     d3, #3
			
 
				+    vshr.u8     d0, #3
			
 
				+    vshrn.u16   d7, q2, #3
			
 
				+    vshrn.u16   d25, q2, #8
			
 
				+    vbic.i16    q2, #0xe0
			
 
				+    vshr.u8     d6, #3
			
 
				+    vshr.u8     d7, #2
			
 
				+    vshr.u8     d2, #3
			
 
				+    vmovn.u16   d24, q2
			
 
				+    vshr.u8     d25, #3
			
 
				+    vmull.u8    q13, d1, d3
			
 
				+    vmlal.u8    q13, d7, d6
			
 
				+    vmull.u8    q14, d0, d3
			
 
				+    vmlal.u8    q14, d24, d6
			
 
				+    vmull.u8    q15, d2, d3
			
 
				+    vmlal.u8    q15, d25, d6
			
 
				+.endm
			
 
				+
			
 
				+.macro ARGBto565PixelAlpha_process_pixblock_tail
			
 
				+    vsra.u16    q13, #5
			
 
				+    vsra.u16    q14, #5
			
 
				+    vsra.u16    q15, #5
			
 
				+    vrshr.u16   q13, #5
			
 
				+    vrshr.u16   q14, #5
			
 
				+    vrshr.u16   q15, #5
			
 
				+    vsli.u16    q14, q13, #5
			
 
				+    vsli.u16    q14, q15, #11
			
 
				+.endm
			
 
				+
			
 
				+.macro ARGBto565PixelAlpha_process_pixblock_tail_head
			
 
				+    vld4.8      {d0-d3}, [SRC]!
			
 
				+                                    PF add PF_X, PF_X, #8
			
 
				+        vsra.u16    q13, #5
			
 
				+                                    PF tst PF_CTL, #0xF
			
 
				+        vsra.u16    q14, #5
			
 
				+                                    PF addne PF_X, PF_X, #8
			
 
				+        vsra.u16    q15, #5
			
 
				+                                    PF subne PF_CTL, PF_CTL, #1
			
 
				+        vrshr.u16   q13, #5
			
 
				+                                    PF cmp PF_X, ORIG_W
			
 
				+        vrshr.u16   q14, #5
			
 
				+                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
			
 
				+        vrshr.u16   q15, #5
			
 
				+                                    PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
			
 
				+    vld1.8      {d4-d5}, [DST_R]!
			
 
				+                                    PF subge PF_X, PF_X, ORIG_W
			
 
				+        vsli.u16    q14, q13, #5
			
 
				+                                    PF subges PF_CTL, PF_CTL, #0x10
			
 
				+        vsli.u16    q14, q15, #11
			
 
				+                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
			
 
				+        vst1.8      {q14}, [DST_W :128]!
			
 
				+    vmvn        d6, d3
			
 
				+    vshr.u8     d1, #2
			
 
				+    vshr.u8     d3, #3
			
 
				+    vshr.u8     d0, #3
			
 
				+    vshrn.u16   d7, q2, #3
			
 
				+    vshrn.u16   d25, q2, #8
			
 
				+    vbic.i16    q2, #0xe0
			
 
				+                                    PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
			
 
				+    vshr.u8     d6, #3
			
 
				+    vshr.u8     d7, #2
			
 
				+    vshr.u8     d2, #3
			
 
				+    vmovn.u16   d24, q2
			
 
				+    vshr.u8     d25, #3
			
 
				+    vmull.u8    q13, d1, d3
			
 
				+    vmlal.u8    q13, d7, d6
			
 
				+    vmull.u8    q14, d0, d3
			
 
				+    vmlal.u8    q14, d24, d6
			
 
				+    vmull.u8    q15, d2, d3
			
 
				+    vmlal.u8    q15, d25, d6
			
 
				+.endm
			
 
				+
			
 
				+generate_composite_function \
			
 
				+    BlitARGBto565PixelAlphaARMNEONAsm, 32, 0, 16, \
			
 
				+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
			
 
				+    8, /* number of pixels, processed in a single block */ \
			
 
				+    6, /* prefetch distance */ \
			
 
				+    default_init, \
			
 
				+    default_cleanup, \
			
 
				+    ARGBto565PixelAlpha_process_pixblock_head, \
			
 
				+    ARGBto565PixelAlpha_process_pixblock_tail, \
			
 
				+    ARGBto565PixelAlpha_process_pixblock_tail_head