Browse Source

video: move standard c, sse and lsx implementations of yuv2rgb to its own source

Anonymous Maarten 1 year ago
parent
commit
dbdc65fc95

+ 11 - 1
VisualC-GDK/SDL/SDL.vcxproj

@@ -570,6 +570,14 @@
     <ClInclude Include="..\..\src\video\windows\SDL_windowswindow.h" />
     <ClInclude Include="..\..\src\video\windows\wmmsg.h" />
     <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_common.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_internal.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_lsx.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_lsx_func.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_sse.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_sse_func.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_std.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_std_func.h" />
     <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_sse_func.h" />
     <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_std_func.h" />
     <ClCompile Include="..\..\src\atomic\SDL_atomic.c" />
@@ -814,7 +822,9 @@
     <ClCompile Include="..\..\src\video\windows\SDL_windowsvideo.c" />
     <ClCompile Include="..\..\src\video\windows\SDL_windowsvulkan.c" />
     <ClCompile Include="..\..\src\video\windows\SDL_windowswindow.c" />
-    <ClCompile Include="..\..\src\video\yuv2rgb\yuv_rgb.c" />
+    <ClCompile Include="..\..\src\video\yuv2rgb\yuv_rgb_lsx.c" />
+    <ClCompile Include="..\..\src\video\yuv2rgb\yuv_rgb_sse.c" />
+    <ClCompile Include="..\..\src\video\yuv2rgb\yuv_rgb_std.c" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="..\..\src\core\windows\version.rc" />

+ 11 - 1
VisualC-WinRT/SDL-UWP.vcxproj

@@ -189,6 +189,14 @@
     <ClInclude Include="..\src\video\winrt\SDL_winrtopengles.h" />
     <ClInclude Include="..\src\video\winrt\SDL_winrtvideo_cpp.h" />
     <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb.h" />
+    <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb_common.h" />
+    <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb_internal.h" />
+    <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb_lsx.h" />
+    <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb_lsx_func.h" />
+    <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb_sse.h" />
+    <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb_sse_func.h" />
+    <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb_std.h" />
+    <ClInclude Include="..\src\video\yuv2rgb\yuv_rgb_std_func.h" />
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\src\atomic\SDL_atomic.c" />
@@ -612,7 +620,9 @@
       <PrecompiledHeaderOutputFile Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(IntDir)$(TargetName)_cpp.pch</PrecompiledHeaderOutputFile>
       <PrecompiledHeaderOutputFile Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(IntDir)$(TargetName)_cpp.pch</PrecompiledHeaderOutputFile>
     </ClCompile>
-    <ClCompile Include="..\src\video\yuv2rgb\yuv_rgb.c" />
+    <ClCompile Include="..\src\video\yuv2rgb\yuv_rgb_lsx.c" />
+    <ClCompile Include="..\src\video\yuv2rgb\yuv_rgb_sse.c" />
+    <ClCompile Include="..\src\video\yuv2rgb\yuv_rgb_std.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{89e9b32e-a86a-47c3-a948-d2b1622925ce}</ProjectGuid>

+ 9 - 1
VisualC/SDL/SDL.vcxproj

@@ -468,7 +468,13 @@
     <ClInclude Include="..\..\src\video\windows\SDL_windowswindow.h" />
     <ClInclude Include="..\..\src\video\windows\wmmsg.h" />
     <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_common.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_internal.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_lsx.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_lsx_func.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_sse.h" />
     <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_sse_func.h" />
+    <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_std.h" />
     <ClInclude Include="..\..\src\video\yuv2rgb\yuv_rgb_std_func.h" />
     <ClCompile Include="..\..\src\atomic\SDL_atomic.c" />
     <ClCompile Include="..\..\src\atomic\SDL_spinlock.c" />
@@ -666,7 +672,9 @@
     <ClCompile Include="..\..\src\video\windows\SDL_windowsvideo.c" />
     <ClCompile Include="..\..\src\video\windows\SDL_windowsvulkan.c" />
     <ClCompile Include="..\..\src\video\windows\SDL_windowswindow.c" />
-    <ClCompile Include="..\..\src\video\yuv2rgb\yuv_rgb.c" />
+    <ClCompile Include="..\..\src\video\yuv2rgb\yuv_rgb_lsx.c" />
+    <ClCompile Include="..\..\src\video\yuv2rgb\yuv_rgb_sse.c" />
+    <ClCompile Include="..\..\src\video\yuv2rgb\yuv_rgb_std.c" />
   </ItemGroup>
   <ItemGroup>
     <ResourceCompile Include="..\..\src\core\windows\version.rc" />

+ 11 - 389
src/video/yuv2rgb/yuv_rgb.h

@@ -1,16 +1,19 @@
+#ifndef YUV_RGB_H_
+#define YUV_RGB_H_
+
 // Copyright 2016 Adrien Descamps
 // Distributed under BSD 3-Clause License
 
 // Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
 
-// There are a few slightly different variations of the YCbCr color space with different parameters that 
+// There are a few slightly different variations of the YCbCr color space with different parameters that
 // change the conversion matrix.
 // The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
 // See the respective standards for details
 // The matrix values used are derived from http://www.equasys.de/colorconversion.html
 
 // YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
-// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This 
+// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
 // is suboptimal for image quality, but by far the fastest method.
 
 // For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
@@ -18,394 +21,13 @@
 
 /*#include <stdint.h>*/
 
-typedef enum
-{
-	YCBCR_JPEG,
-	YCBCR_601,
-	YCBCR_709
-} YCbCrType;
-
 // yuv to rgb, standard c implementation
-void yuv420_rgb565_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_rgb24_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_rgba_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_bgra_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_argb_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_abgr_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgb565_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgb24_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgba_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_bgra_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_argb_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_abgr_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgb565_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgb24_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgba_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_bgra_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_argb_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_abgr_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-// yuv to rgb, sse implementation
-// pointers must be 16 byte aligned, and strides must be divisable by 16
-void yuv420_rgb565_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_rgb24_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_rgba_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_bgra_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_argb_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_abgr_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgb565_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgb24_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgba_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_bgra_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_argb_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_abgr_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgb565_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgb24_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgba_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_bgra_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_argb_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_abgr_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-// yuv to rgb, sse implementation
-// pointers do not need to be 16 byte aligned
-void yuv420_rgb565_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_rgb24_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_rgba_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_bgra_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_argb_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv420_abgr_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgb565_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgb24_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_rgba_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_bgra_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_argb_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuv422_abgr_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgb565_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgb24_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_rgba_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_bgra_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_argb_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-void yuvnv12_abgr_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	uint8_t *rgb, uint32_t rgb_stride, 
-	YCbCrType yuv_type);
-
-
-// rgb to yuv, standard c implementation
-void rgb24_yuv420_std(
-	uint32_t width, uint32_t height, 
-	const uint8_t *rgb, uint32_t rgb_stride, 
-	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	YCbCrType yuv_type);
-
-// rgb to yuv, sse implementation
-// pointers must be 16 byte aligned, and strides must be divisible by 16
-void rgb24_yuv420_sse(
-	uint32_t width, uint32_t height, 
-	const uint8_t *rgb, uint32_t rgb_stride, 
-	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	YCbCrType yuv_type);
-
-// rgb to yuv, sse implementation
-// pointers do not need to be 16 byte aligned
-void rgb24_yuv420_sseu(
-	uint32_t width, uint32_t height, 
-	const uint8_t *rgb, uint32_t rgb_stride, 
-	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
-	YCbCrType yuv_type);
-
-
-//yuv420 to bgra, lsx implementation
-void yuv420_rgb24_lsx(
-	uint32_t width, uint32_t height,
-	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
-	uint8_t *rgb, uint32_t rgb_stride,
-	YCbCrType yuv_type);
-
-void yuv420_rgba_lsx(
-	uint32_t width, uint32_t height,
-	const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
-	uint8_t *rgb, uint32_t rgb_stride,
-	YCbCrType yuv_type);
+#include "yuv_rgb_std.h"
 
-void yuv420_bgra_lsx(
-	uint32_t width, uint32_t height,
-	const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
-	uint8_t *rgb, uint32_t rgb_stride,
-	YCbCrType yuv_type);
+// yuv to rgb, sse2 implementation
+#include "yuv_rgb_sse.h"
 
-void yuv420_argb_lsx(
-	uint32_t width, uint32_t height,
-	const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
-	uint8_t *rgb, uint32_t rgb_stride,
-	YCbCrType yuv_type);
+// yuv to rgb, lsx implementation
+#include "yuv_rgb_lsx.h"
 
-void yuv420_abgr_lsx(
-	uint32_t width, uint32_t height,
-	const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
-	uint8_t *rgb, uint32_t rgb_stride,
-	YCbCrType yuv_type);
+#endif /* YUV_RGB_H_ */

+ 13 - 0
src/video/yuv2rgb/yuv_rgb_common.h

@@ -0,0 +1,13 @@
+#ifndef YUV_RGB_COMMON_H_
+#define YUV_RGB_COMMON_H_
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+
+typedef enum
+{
+    YCBCR_JPEG,
+    YCBCR_601,
+    YCBCR_709
+} YCbCrType;
+
+#endif /* YUV_RGB_COMMON_H_ */

+ 75 - 0
src/video/yuv2rgb/yuv_rgb_internal.h

@@ -0,0 +1,75 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+#include "yuv_rgb.h"
+
+#define PRECISION 6
+#define PRECISION_FACTOR (1<<PRECISION)
+
+typedef struct
+{
+	uint8_t y_shift;
+	int16_t matrix[3][3];
+} RGB2YUVParam;
+// |Y|   |y_shift|                        |matrix[0][0] matrix[0][1] matrix[0][2]|   |R|
+// |U| = |  128  | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G|
+// |V|   |  128  |                        |matrix[2][0] matrix[2][1] matrix[2][2]|   |B|
+
+typedef struct
+{
+	uint8_t y_shift;
+	int16_t y_factor;
+	int16_t v_r_factor;
+	int16_t u_g_factor;
+	int16_t v_g_factor;
+	int16_t u_b_factor;
+} YUV2RGBParam;
+// |R|                        |y_factor      0       v_r_factor|   |Y-y_shift|
+// |G| = 1/PRECISION_FACTOR * |y_factor  u_g_factor  v_g_factor| * |  U-128  |
+// |B|                        |y_factor  u_b_factor      0     |   |  V-128  |
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 26451)
+#endif
+
+#define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
+
+// for ITU-T T.871, values can be found in section 7
+// for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255])
+// for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
+// all values are rounded to the fourth decimal
+
+static const YUV2RGBParam YUV2RGB[3] = {
+	// ITU-T T.871 (JPEG)
+	{/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
+	// ITU-R BT.601-7
+	{/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
+	// ITU-R BT.709-6
+	{/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
+};
+
+static const RGB2YUVParam RGB2YUV[3] = {
+	// ITU-T T.871 (JPEG)
+	{/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
+	// ITU-R BT.601-7
+	{/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
+	// ITU-R BT.709-6
+	{/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+/* The various layouts of YUV data we support */
+#define YUV_FORMAT_420	1
+#define YUV_FORMAT_422	2
+#define YUV_FORMAT_NV12	3
+
+/* The various formats of RGB pixel that we support */
+#define RGB_FORMAT_RGB565	1
+#define RGB_FORMAT_RGB24	2
+#define RGB_FORMAT_RGBA		3
+#define RGB_FORMAT_BGRA		4
+#define RGB_FORMAT_ARGB		5
+#define RGB_FORMAT_ABGR		6

+ 42 - 0
src/video/yuv2rgb/yuv_rgb_lsx.c

@@ -0,0 +1,42 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+#include "SDL_internal.h"
+
+#if SDL_HAVE_YUV
+#include "yuv_rgb_lsx.h"
+
+#ifdef SDL_LSX_INTRINSICS
+
+#define LSX_FUNCTION_NAME	yuv420_rgb24_lsx
+#define STD_FUNCTION_NAME	yuv420_rgb24_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_RGB24
+#include "yuv_rgb_lsx_func.h"
+
+#define LSX_FUNCTION_NAME	yuv420_rgba_lsx
+#define STD_FUNCTION_NAME	yuv420_rgba_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_RGBA
+#include "yuv_rgb_lsx_func.h"
+
+#define LSX_FUNCTION_NAME	yuv420_bgra_lsx
+#define STD_FUNCTION_NAME	yuv420_bgra_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_BGRA
+#include "yuv_rgb_lsx_func.h"
+
+#define LSX_FUNCTION_NAME	yuv420_argb_lsx
+#define STD_FUNCTION_NAME	yuv420_argb_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_ARGB
+#include "yuv_rgb_lsx_func.h"
+
+#define LSX_FUNCTION_NAME	yuv420_abgr_lsx
+#define STD_FUNCTION_NAME	yuv420_abgr_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_ABGR
+#include "yuv_rgb_lsx_func.h"
+
+#endif  // SDL_LSX_INTRINSICS
+
+#endif // SDL_HAVE_YUV

+ 36 - 0
src/video/yuv2rgb/yuv_rgb_lsx.h

@@ -0,0 +1,36 @@
+#ifdef SDL_LSX_INTRINSICS
+
+#include "yuv_rgb_common.h"
+
+//yuv420 to bgra, lsx implementation
+void yuv420_rgb24_lsx(
+	uint32_t width, uint32_t height,
+	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+	uint8_t *rgb, uint32_t rgb_stride,
+	YCbCrType yuv_type);
+
+void yuv420_rgba_lsx(
+	uint32_t width, uint32_t height,
+	const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
+	uint8_t *rgb, uint32_t rgb_stride,
+	YCbCrType yuv_type);
+
+void yuv420_bgra_lsx(
+	uint32_t width, uint32_t height,
+	const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
+	uint8_t *rgb, uint32_t rgb_stride,
+	YCbCrType yuv_type);
+
+void yuv420_argb_lsx(
+	uint32_t width, uint32_t height,
+	const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
+	uint8_t *rgb, uint32_t rgb_stride,
+	YCbCrType yuv_type);
+
+void yuv420_abgr_lsx(
+	uint32_t width, uint32_t height,
+	const uint8_t *y, const uint8_t *v, const uint8_t *u, uint32_t y_stride, uint32_t uv_stride,
+	uint8_t *rgb, uint32_t rgb_stride,
+	YCbCrType yuv_type);
+
+#endif  //SDL_LSX_INTRINSICS

+ 3 - 280
src/video/yuv2rgb/yuv_rgb.c → src/video/yuv2rgb/yuv_rgb_sse.c

@@ -3,250 +3,7 @@
 #include "SDL_internal.h"
 
 #if SDL_HAVE_YUV
-
-#include "yuv_rgb.h"
-
-#define PRECISION 6
-#define PRECISION_FACTOR (1<<PRECISION)
-
-typedef struct
-{
-	uint8_t y_shift;
-	int16_t matrix[3][3];
-} RGB2YUVParam;
-// |Y|   |y_shift|                        |matrix[0][0] matrix[0][1] matrix[0][2]|   |R|
-// |U| = |  128  | + 1/PRECISION_FACTOR * |matrix[1][0] matrix[1][1] matrix[1][2]| * |G|
-// |V|   |  128  |                        |matrix[2][0] matrix[2][1] matrix[2][2]|   |B|
-
-typedef struct
-{
-	uint8_t y_shift;
-	int16_t y_factor;
-	int16_t v_r_factor;
-	int16_t u_g_factor;
-	int16_t v_g_factor;
-	int16_t u_b_factor;
-} YUV2RGBParam;
-// |R|                        |y_factor      0       v_r_factor|   |Y-y_shift|
-// |G| = 1/PRECISION_FACTOR * |y_factor  u_g_factor  v_g_factor| * |  U-128  |
-// |B|                        |y_factor  u_b_factor      0     |   |  V-128  |
-
-#ifdef _MSC_VER
-#pragma warning(push)
-#pragma warning(disable : 26451)
-#endif
-
-#define V(value) (int16_t)((value*PRECISION_FACTOR)+0.5)
-
-// for ITU-T T.871, values can be found in section 7
-// for ITU-R BT.601-7 values are derived from equations in sections 2.5.1-2.5.3, assuming RGB is encoded using full range ([0-1]<->[0-255])
-// for ITU-R BT.709-6 values are derived from equations in sections 3.2-3.4, assuming RGB is encoded using full range ([0-1]<->[0-255])
-// all values are rounded to the fourth decimal
-
-static const YUV2RGBParam YUV2RGB[3] = {
-	// ITU-T T.871 (JPEG)
-	{/*.y_shift=*/ 0, /*.y_factor=*/ V(1.0), /*.v_r_factor=*/ V(1.402), /*.u_g_factor=*/ -V(0.3441), /*.v_g_factor=*/ -V(0.7141), /*.u_b_factor=*/ V(1.772)},
-	// ITU-R BT.601-7
-	{/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.596), /*.u_g_factor=*/ -V(0.3918), /*.v_g_factor=*/ -V(0.813), /*.u_b_factor=*/ V(2.0172)},
-	// ITU-R BT.709-6
-	{/*.y_shift=*/ 16, /*.y_factor=*/ V(1.1644), /*.v_r_factor=*/ V(1.7927), /*.u_g_factor=*/ -V(0.2132), /*.v_g_factor=*/ -V(0.5329), /*.u_b_factor=*/ V(2.1124)}
-};
-
-static const RGB2YUVParam RGB2YUV[3] = {
-	// ITU-T T.871 (JPEG)
-	{/*.y_shift=*/ 0, /*.matrix=*/ {{V(0.299), V(0.587), V(0.114)}, {-V(0.1687), -V(0.3313), V(0.5)}, {V(0.5), -V(0.4187), -V(0.0813)}}},
-	// ITU-R BT.601-7
-	{/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.2568), V(0.5041), V(0.0979)}, {-V(0.1482), -V(0.291), V(0.4392)}, {V(0.4392), -V(0.3678), -V(0.0714)}}},
-	// ITU-R BT.709-6
-	{/*.y_shift=*/ 16, /*.matrix=*/ {{V(0.1826), V(0.6142), V(0.062)}, {-V(0.1006), -V(0.3386), V(0.4392)}, {V(0.4392), -V(0.3989), -V(0.0403)}}}
-};
-
-#ifdef _MSC_VER
-#pragma warning(pop)
-#endif
-
-/* The various layouts of YUV data we support */
-#define YUV_FORMAT_420	1
-#define YUV_FORMAT_422	2
-#define YUV_FORMAT_NV12	3
-
-/* The various formats of RGB pixel that we support */
-#define RGB_FORMAT_RGB565	1
-#define RGB_FORMAT_RGB24	2
-#define RGB_FORMAT_RGBA		3
-#define RGB_FORMAT_BGRA		4
-#define RGB_FORMAT_ARGB		5
-#define RGB_FORMAT_ABGR		6
-
-// divide by PRECISION_FACTOR and clamp to [0:255] interval
-// input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
-static uint8_t clampU8(int32_t v)
-{
-	static const uint8_t lut[512] =
-	{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
-	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
-	47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
-	91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
-	126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
-	159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
-	192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
-	225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
-	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
-	255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
-	};
-	return lut[((v+128*PRECISION_FACTOR)>>PRECISION)&511];
-}
-
-
-#define STD_FUNCTION_NAME	yuv420_rgb565_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_RGB565
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv420_rgb24_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_RGB24
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv420_rgba_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_RGBA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv420_bgra_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_BGRA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv420_argb_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_ARGB
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv420_abgr_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_ABGR
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv422_rgb565_std
-#define YUV_FORMAT			YUV_FORMAT_422
-#define RGB_FORMAT			RGB_FORMAT_RGB565
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv422_rgb24_std
-#define YUV_FORMAT			YUV_FORMAT_422
-#define RGB_FORMAT			RGB_FORMAT_RGB24
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv422_rgba_std
-#define YUV_FORMAT			YUV_FORMAT_422
-#define RGB_FORMAT			RGB_FORMAT_RGBA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv422_bgra_std
-#define YUV_FORMAT			YUV_FORMAT_422
-#define RGB_FORMAT			RGB_FORMAT_BGRA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv422_argb_std
-#define YUV_FORMAT			YUV_FORMAT_422
-#define RGB_FORMAT			RGB_FORMAT_ARGB
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuv422_abgr_std
-#define YUV_FORMAT			YUV_FORMAT_422
-#define RGB_FORMAT			RGB_FORMAT_ABGR
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuvnv12_rgb565_std
-#define YUV_FORMAT			YUV_FORMAT_NV12
-#define RGB_FORMAT			RGB_FORMAT_RGB565
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuvnv12_rgb24_std
-#define YUV_FORMAT			YUV_FORMAT_NV12
-#define RGB_FORMAT			RGB_FORMAT_RGB24
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuvnv12_rgba_std
-#define YUV_FORMAT			YUV_FORMAT_NV12
-#define RGB_FORMAT			RGB_FORMAT_RGBA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuvnv12_bgra_std
-#define YUV_FORMAT			YUV_FORMAT_NV12
-#define RGB_FORMAT			RGB_FORMAT_BGRA
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuvnv12_argb_std
-#define YUV_FORMAT			YUV_FORMAT_NV12
-#define RGB_FORMAT			RGB_FORMAT_ARGB
-#include "yuv_rgb_std_func.h"
-
-#define STD_FUNCTION_NAME	yuvnv12_abgr_std
-#define YUV_FORMAT			YUV_FORMAT_NV12
-#define RGB_FORMAT			RGB_FORMAT_ABGR
-#include "yuv_rgb_std_func.h"
-
-void rgb24_yuv420_std(
-	uint32_t width, uint32_t height,
-	const uint8_t *RGB, uint32_t RGB_stride,
-	uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
-	YCbCrType yuv_type)
-{
-	const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
-
-	uint32_t x, y;
-	for(y=0; y<(height-1); y+=2)
-	{
-		const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
-			*rgb_ptr2=RGB+(y+1)*RGB_stride;
-
-		uint8_t *y_ptr1=Y+y*Y_stride,
-			*y_ptr2=Y+(y+1)*Y_stride,
-			*u_ptr=U+(y/2)*UV_stride,
-			*v_ptr=V+(y/2)*UV_stride;
-
-		for(x=0; x<(width-1); x+=2)
-		{
-			// compute yuv for the four pixels, u and v values are summed
-			int32_t y_tmp, u_tmp, v_tmp;
-
-			y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
-			u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
-			v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
-			y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
-
-			y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
-			u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
-			v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
-			y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
-
-			y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
-			u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
-			v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
-			y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
-
-			y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
-			u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
-			v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
-			y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
-
-			u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION));
-			v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION));
-
-			rgb_ptr1 += 6;
-			rgb_ptr2 += 6;
-			y_ptr1 += 2;
-			y_ptr2 += 2;
-			u_ptr += 1;
-			v_ptr += 1;
-		}
-	}
-}
+#include "yuv_rgb_internal.h"
 
 #ifdef SDL_SSE2_INTRINSICS
 
@@ -698,40 +455,6 @@ void SDL_TARGETING("sse2") rgb24_yuv420_sseu(uint32_t width, uint32_t height,
 }
 */
 
-#endif //SDL_SSE2_INTRINSICS
-
-#ifdef SDL_LSX_INTRINSICS
-
-#define LSX_FUNCTION_NAME	yuv420_rgb24_lsx
-#define STD_FUNCTION_NAME	yuv420_rgb24_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_RGB24
-#include "yuv_rgb_lsx_func.h"
-
-#define LSX_FUNCTION_NAME	yuv420_rgba_lsx
-#define STD_FUNCTION_NAME	yuv420_rgba_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_RGBA
-#include "yuv_rgb_lsx_func.h"
-
-#define LSX_FUNCTION_NAME	yuv420_bgra_lsx
-#define STD_FUNCTION_NAME	yuv420_bgra_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_BGRA
-#include "yuv_rgb_lsx_func.h"
-
-#define LSX_FUNCTION_NAME	yuv420_argb_lsx
-#define STD_FUNCTION_NAME	yuv420_argb_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_ARGB
-#include "yuv_rgb_lsx_func.h"
-
-#define LSX_FUNCTION_NAME	yuv420_abgr_lsx
-#define STD_FUNCTION_NAME	yuv420_abgr_std
-#define YUV_FORMAT			YUV_FORMAT_420
-#define RGB_FORMAT			RGB_FORMAT_ABGR
-#include "yuv_rgb_lsx_func.h"
-
-#endif  //SDL_LSX_INTRINSICS
+#endif // SDL_SSE2_INTRINSICS
 
-#endif /* SDL_HAVE_YUV */
+#endif // SDL_HAVE_YUV

+ 248 - 0
src/video/yuv2rgb/yuv_rgb_sse.h

@@ -0,0 +1,248 @@
+#ifdef SDL_SSE2_INTRINSICS
+
+#include "yuv_rgb_common.h"
+
+// yuv to rgb, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisable by 16
+void yuv420_rgb565_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_rgb24_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_rgba_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_bgra_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_argb_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_abgr_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgb565_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgb24_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgba_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_bgra_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_argb_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_abgr_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgb565_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgb24_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgba_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_bgra_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_argb_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_abgr_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+// yuv to rgb, sse implementation
+// pointers do not need to be 16 byte aligned
+void yuv420_rgb565_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_rgb24_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_rgba_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_bgra_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_argb_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_abgr_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgb565_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgb24_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgba_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_bgra_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_argb_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_abgr_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgb565_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgb24_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgba_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_bgra_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_argb_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_abgr_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+
+// rgb to yuv, standard c implementation
+void rgb24_yuv420_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *rgb, uint32_t rgb_stride,
+        uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        YCbCrType yuv_type);
+
+// rgb to yuv, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisible by 16
+void rgb24_yuv420_sse(
+        uint32_t width, uint32_t height,
+        const uint8_t *rgb, uint32_t rgb_stride,
+        uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        YCbCrType yuv_type);
+
+// rgb to yuv, sse implementation
+// pointers do not need to be 16 byte aligned
+void rgb24_yuv420_sseu(
+        uint32_t width, uint32_t height,
+        const uint8_t *rgb, uint32_t rgb_stride,
+        uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        YCbCrType yuv_type);
+#endif

+ 179 - 0
src/video/yuv2rgb/yuv_rgb_std.c

@@ -0,0 +1,179 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+#include "SDL_internal.h"
+
+#if SDL_HAVE_YUV
+
+#include "yuv_rgb_internal.h"
+
+// divide by PRECISION_FACTOR and clamp to [0:255] interval
+// input must be in the [-128*PRECISION_FACTOR:384*PRECISION_FACTOR] range
+static uint8_t clampU8(int32_t v)
+{
+    static const uint8_t lut[512] =
+            {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+             0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,
+             47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,
+             91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,
+             126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,
+             159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+             192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,
+             225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
+             255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+             255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+             255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+             255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
+            };
+    return lut[((v+128*PRECISION_FACTOR)>>PRECISION)&511];
+}
+
+
+#define STD_FUNCTION_NAME	yuv420_rgb565_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_RGB565
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv420_rgb24_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_RGB24
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv420_rgba_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_RGBA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv420_bgra_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_BGRA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv420_argb_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_ARGB
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv420_abgr_std
+#define YUV_FORMAT			YUV_FORMAT_420
+#define RGB_FORMAT			RGB_FORMAT_ABGR
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv422_rgb565_std
+#define YUV_FORMAT			YUV_FORMAT_422
+#define RGB_FORMAT			RGB_FORMAT_RGB565
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv422_rgb24_std
+#define YUV_FORMAT			YUV_FORMAT_422
+#define RGB_FORMAT			RGB_FORMAT_RGB24
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv422_rgba_std
+#define YUV_FORMAT			YUV_FORMAT_422
+#define RGB_FORMAT			RGB_FORMAT_RGBA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv422_bgra_std
+#define YUV_FORMAT			YUV_FORMAT_422
+#define RGB_FORMAT			RGB_FORMAT_BGRA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv422_argb_std
+#define YUV_FORMAT			YUV_FORMAT_422
+#define RGB_FORMAT			RGB_FORMAT_ARGB
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuv422_abgr_std
+#define YUV_FORMAT			YUV_FORMAT_422
+#define RGB_FORMAT			RGB_FORMAT_ABGR
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuvnv12_rgb565_std
+#define YUV_FORMAT			YUV_FORMAT_NV12
+#define RGB_FORMAT			RGB_FORMAT_RGB565
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuvnv12_rgb24_std
+#define YUV_FORMAT			YUV_FORMAT_NV12
+#define RGB_FORMAT			RGB_FORMAT_RGB24
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuvnv12_rgba_std
+#define YUV_FORMAT			YUV_FORMAT_NV12
+#define RGB_FORMAT			RGB_FORMAT_RGBA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuvnv12_bgra_std
+#define YUV_FORMAT			YUV_FORMAT_NV12
+#define RGB_FORMAT			RGB_FORMAT_BGRA
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuvnv12_argb_std
+#define YUV_FORMAT			YUV_FORMAT_NV12
+#define RGB_FORMAT			RGB_FORMAT_ARGB
+#include "yuv_rgb_std_func.h"
+
+#define STD_FUNCTION_NAME	yuvnv12_abgr_std
+#define YUV_FORMAT			YUV_FORMAT_NV12
+#define RGB_FORMAT			RGB_FORMAT_ABGR
+#include "yuv_rgb_std_func.h"
+
+void rgb24_yuv420_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *RGB, uint32_t RGB_stride,
+        uint8_t *Y, uint8_t *U, uint8_t *V, uint32_t Y_stride, uint32_t UV_stride,
+        YCbCrType yuv_type)
+{
+    const RGB2YUVParam *const param = &(RGB2YUV[yuv_type]);
+
+    uint32_t x, y;
+    for(y=0; y<(height-1); y+=2)
+    {
+        const uint8_t *rgb_ptr1=RGB+y*RGB_stride,
+                *rgb_ptr2=RGB+(y+1)*RGB_stride;
+
+        uint8_t *y_ptr1=Y+y*Y_stride,
+                *y_ptr2=Y+(y+1)*Y_stride,
+                *u_ptr=U+(y/2)*UV_stride,
+                *v_ptr=V+(y/2)*UV_stride;
+
+        for(x=0; x<(width-1); x+=2)
+        {
+            // compute yuv for the four pixels, u and v values are summed
+            int32_t y_tmp, u_tmp, v_tmp;
+
+            y_tmp = param->matrix[0][0]*rgb_ptr1[0] + param->matrix[0][1]*rgb_ptr1[1] + param->matrix[0][2]*rgb_ptr1[2];
+            u_tmp = param->matrix[1][0]*rgb_ptr1[0] + param->matrix[1][1]*rgb_ptr1[1] + param->matrix[1][2]*rgb_ptr1[2];
+            v_tmp = param->matrix[2][0]*rgb_ptr1[0] + param->matrix[2][1]*rgb_ptr1[1] + param->matrix[2][2]*rgb_ptr1[2];
+            y_ptr1[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
+
+            y_tmp = param->matrix[0][0]*rgb_ptr1[3] + param->matrix[0][1]*rgb_ptr1[4] + param->matrix[0][2]*rgb_ptr1[5];
+            u_tmp += param->matrix[1][0]*rgb_ptr1[3] + param->matrix[1][1]*rgb_ptr1[4] + param->matrix[1][2]*rgb_ptr1[5];
+            v_tmp += param->matrix[2][0]*rgb_ptr1[3] + param->matrix[2][1]*rgb_ptr1[4] + param->matrix[2][2]*rgb_ptr1[5];
+            y_ptr1[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
+
+            y_tmp = param->matrix[0][0]*rgb_ptr2[0] + param->matrix[0][1]*rgb_ptr2[1] + param->matrix[0][2]*rgb_ptr2[2];
+            u_tmp += param->matrix[1][0]*rgb_ptr2[0] + param->matrix[1][1]*rgb_ptr2[1] + param->matrix[1][2]*rgb_ptr2[2];
+            v_tmp += param->matrix[2][0]*rgb_ptr2[0] + param->matrix[2][1]*rgb_ptr2[1] + param->matrix[2][2]*rgb_ptr2[2];
+            y_ptr2[0]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
+
+            y_tmp = param->matrix[0][0]*rgb_ptr2[3] + param->matrix[0][1]*rgb_ptr2[4] + param->matrix[0][2]*rgb_ptr2[5];
+            u_tmp += param->matrix[1][0]*rgb_ptr2[3] + param->matrix[1][1]*rgb_ptr2[4] + param->matrix[1][2]*rgb_ptr2[5];
+            v_tmp += param->matrix[2][0]*rgb_ptr2[3] + param->matrix[2][1]*rgb_ptr2[4] + param->matrix[2][2]*rgb_ptr2[5];
+            y_ptr2[1]=clampU8(y_tmp+((param->y_shift)<<PRECISION));
+
+            u_ptr[0] = clampU8(u_tmp/4+(128<<PRECISION));
+            v_ptr[0] = clampU8(v_tmp/4+(128<<PRECISION));
+
+            rgb_ptr1 += 6;
+            rgb_ptr2 += 6;
+            y_ptr1 += 2;
+            y_ptr2 += 2;
+            u_ptr += 1;
+            v_ptr += 1;
+        }
+    }
+}
+
+#endif /* SDL_HAVE_YUV */

+ 130 - 0
src/video/yuv2rgb/yuv_rgb_std.h

@@ -0,0 +1,130 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+
+// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
+
+// There are a few slightly different variations of the YCbCr color space with different parameters that
+// change the conversion matrix.
+// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
+// See the respective standards for details
+// The matrix values used are derived from http://www.equasys.de/colorconversion.html
+
+// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
+// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
+// is suboptimal for image quality, but by far the fastest method.
+
+// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
+// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected.
+
+/*#include <stdint.h>*/
+
+#include "yuv_rgb_common.h"
+
+// yuv to rgb, standard c implementation
+void yuv420_rgb565_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_rgb24_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_rgba_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_bgra_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_argb_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv420_abgr_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgb565_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgb24_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_rgba_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_bgra_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_argb_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuv422_abgr_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgb565_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgb24_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_rgba_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_bgra_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_argb_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);
+
+void yuvnv12_abgr_std(
+        uint32_t width, uint32_t height,
+        const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
+        uint8_t *rgb, uint32_t rgb_stride,
+        YCbCrType yuv_type);