Browse Source

Add SDL_BindGPUComputeSamplers (#10778)

---------

Co-authored-by: Caleb Cornett <caleb.cornett@outlook.com>
Evan Hemsley 7 months ago
parent
commit
2b8a349b26

+ 23 - 4
include/SDL3/SDL_gpu.h

@@ -1491,6 +1491,7 @@ typedef struct SDL_GPUComputePipelineCreateInfo
     const Uint8 *code;                      /**< A pointer to compute shader code. */
     const char *entrypoint;                 /**< A pointer to a null-terminated UTF-8 string specifying the entry point function name for the shader. */
     SDL_GPUShaderFormat format;             /**< The format of the compute shader code. */
+    Uint32 num_samplers;                    /**< The number of samplers defined in the shader. */
     Uint32 num_readonly_storage_textures;   /**< The number of readonly storage textures defined in the shader. */
     Uint32 num_readonly_storage_buffers;    /**< The number of readonly storage buffers defined in the shader. */
     Uint32 num_writeonly_storage_textures;  /**< The number of writeonly storage textures defined in the shader. */
@@ -1791,13 +1792,13 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
  *
  * For SPIR-V shaders, use the following resource sets:
  *
- * - 0: Read-only storage textures, followed by read-only storage buffers
+ * - 0: Sampled textures, followed by read-only storage textures, followed by read-only storage buffers
  * - 1: Write-only storage textures, followed by write-only storage buffers
  * - 2: Uniform buffers
  *
  * For DXBC Shader Model 5_0 shaders, use the following register order:
  *
- * - t registers: Read-only storage textures, followed by read-only storage
+ * - t registers: Sampled textures, followed by read-only storage textures, followed by read-only storage
  *   buffers
  * - u registers: Write-only storage textures, followed by write-only storage
  *   buffers
@@ -1805,7 +1806,7 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
  *
  * For DXIL shaders, use the following register order:
  *
- * - (t[n], space0): Read-only storage textures, followed by read-only storage
+ * - (t[n], space0): Sampled textures, followed by read-only storage textures, followed by read-only storage
  *   buffers
  * - (u[n], space1): Write-only storage textures, followed by write-only
  *   storage buffers
@@ -1815,7 +1816,7 @@ extern SDL_DECLSPEC SDL_GPUDriver SDLCALL SDL_GetGPUDriver(SDL_GPUDevice *device
  *
  * - [[buffer]]: Uniform buffers, followed by write-only storage buffers,
  *   followed by write-only storage buffers
- * - [[texture]]: Read-only storage textures, followed by write-only storage
+ * - [[texture]]: Sampled textures, followed by read-only storage textures, followed by write-only storage
  *   textures
  *
  * \param device a GPU Context.
@@ -2757,6 +2758,24 @@ extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputePipeline(
     SDL_GPUComputePass *compute_pass,
     SDL_GPUComputePipeline *compute_pipeline);
 
+/**
+ * Binds texture-sampler pairs for use on the compute shader.
+ *
+ * The textures must have been created with SDL_GPU_TEXTUREUSAGE_SAMPLER.
+ *
+ * \param compute_pass a compute pass handle.
+ * \param first_slot the compute sampler slot to begin binding from.
+ * \param texture_sampler_bindings an array of texture-sampler binding structs.
+ * \param num_bindings the number of texture-sampler bindings to bind from the array.
+ *
+ * \since This function is available since SDL 3.0.0
+ */
+extern SDL_DECLSPEC void SDLCALL SDL_BindGPUComputeSamplers(
+    SDL_GPUComputePass *compute_pass,
+    Uint32 first_slot,
+    const SDL_GPUTextureSamplerBinding *texture_sampler_bindings,
+    Uint32 num_bindings);
+
 /**
  * Binds storage textures as readonly for use on the compute pipeline.
  *

+ 1 - 0
src/dynapi/SDL_dynapi.sym

@@ -29,6 +29,7 @@ SDL3_0.0.0 {
     SDL_BindAudioStream;
     SDL_BindAudioStreams;
     SDL_BindGPUComputePipeline;
+    SDL_BindGPUComputeSamplers;
     SDL_BindGPUComputeStorageBuffers;
     SDL_BindGPUComputeStorageTextures;
     SDL_BindGPUFragmentSamplers;

+ 1 - 0
src/dynapi/SDL_dynapi_overrides.h

@@ -54,6 +54,7 @@
 #define SDL_BindAudioStream SDL_BindAudioStream_REAL
 #define SDL_BindAudioStreams SDL_BindAudioStreams_REAL
 #define SDL_BindGPUComputePipeline SDL_BindGPUComputePipeline_REAL
+#define SDL_BindGPUComputeSamplers SDL_BindGPUComputeSamplers_REAL
 #define SDL_BindGPUComputeStorageBuffers SDL_BindGPUComputeStorageBuffers_REAL
 #define SDL_BindGPUComputeStorageTextures SDL_BindGPUComputeStorageTextures_REAL
 #define SDL_BindGPUFragmentSamplers SDL_BindGPUFragmentSamplers_REAL

+ 1 - 0
src/dynapi/SDL_dynapi_procs.h

@@ -74,6 +74,7 @@ SDL_DYNAPI_PROC(SDL_GPURenderPass*,SDL_BeginGPURenderPass,(SDL_GPUCommandBuffer
 SDL_DYNAPI_PROC(SDL_bool,SDL_BindAudioStream,(SDL_AudioDeviceID a, SDL_AudioStream *b),(a,b),return)
 SDL_DYNAPI_PROC(SDL_bool,SDL_BindAudioStreams,(SDL_AudioDeviceID a, SDL_AudioStream **b, int c),(a,b,c),return)
 SDL_DYNAPI_PROC(void,SDL_BindGPUComputePipeline,(SDL_GPUComputePass *a, SDL_GPUComputePipeline *b),(a,b),)
+SDL_DYNAPI_PROC(void,SDL_BindGPUComputeSamplers,(SDL_GPUComputePass *a, Uint32 b, const SDL_GPUTextureSamplerBinding *c, Uint32 d),(a,b,c,d),)
 SDL_DYNAPI_PROC(void,SDL_BindGPUComputeStorageBuffers,(SDL_GPUComputePass *a, Uint32 b, SDL_GPUBuffer *const *c, Uint32 d),(a,b,c,d),)
 SDL_DYNAPI_PROC(void,SDL_BindGPUComputeStorageTextures,(SDL_GPUComputePass *a, Uint32 b, SDL_GPUTexture *const *c, Uint32 d),(a,b,c,d),)
 SDL_DYNAPI_PROC(void,SDL_BindGPUFragmentSamplers,(SDL_GPURenderPass *a, Uint32 b, const SDL_GPUTextureSamplerBinding *c, Uint32 d),(a,b,c,d),)

+ 26 - 0
src/gpu/SDL_gpu.c

@@ -1782,6 +1782,32 @@ void SDL_BindGPUComputePipeline(
     commandBufferHeader->compute_pipeline_bound = true;
 }
 
+void SDL_BindGPUComputeSamplers(
+    SDL_GPUComputePass *compute_pass,
+    Uint32 first_slot,
+    const SDL_GPUTextureSamplerBinding *texture_sampler_bindings,
+    Uint32 num_bindings)
+{
+    if (compute_pass == NULL) {
+        SDL_InvalidParamError("compute_pass");
+        return;
+    }
+    if (texture_sampler_bindings == NULL && num_bindings > 0) {
+        SDL_InvalidParamError("texture_sampler_bindings");
+        return;
+    }
+
+    if (COMPUTEPASS_DEVICE->debug_mode) {
+        CHECK_COMPUTEPASS
+    }
+
+    COMPUTEPASS_DEVICE->BindComputeSamplers(
+        COMPUTEPASS_COMMAND_BUFFER,
+        first_slot,
+        texture_sampler_bindings,
+        num_bindings);
+}
+
 void SDL_BindGPUComputeStorageTextures(
     SDL_GPUComputePass *compute_pass,
     Uint32 first_slot,

+ 7 - 0
src/gpu/SDL_sysgpu.h

@@ -511,6 +511,12 @@ struct SDL_GPUDevice
         SDL_GPUCommandBuffer *commandBuffer,
         SDL_GPUComputePipeline *computePipeline);
 
+    void (*BindComputeSamplers)(
+        SDL_GPUCommandBuffer *commandBuffer,
+        Uint32 firstSlot,
+        const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
+        Uint32 numBindings);
+
     void (*BindComputeStorageTextures)(
         SDL_GPUCommandBuffer *commandBuffer,
         Uint32 firstSlot,
@@ -740,6 +746,7 @@ struct SDL_GPUDevice
     ASSIGN_DRIVER_FUNC(EndRenderPass, name)                 \
     ASSIGN_DRIVER_FUNC(BeginComputePass, name)              \
     ASSIGN_DRIVER_FUNC(BindComputePipeline, name)           \
+    ASSIGN_DRIVER_FUNC(BindComputeSamplers, name)           \
     ASSIGN_DRIVER_FUNC(BindComputeStorageTextures, name)    \
     ASSIGN_DRIVER_FUNC(BindComputeStorageBuffers, name)     \
     ASSIGN_DRIVER_FUNC(PushComputeUniformData, name)        \

+ 337 - 141
src/gpu/d3d11/SDL_gpu_d3d11.c

@@ -414,13 +414,13 @@ typedef struct D3D11TextureContainer
     TextureCommonHeader header;
 
     D3D11Texture *activeTexture;
-    bool canBeCycled;
 
     Uint32 textureCapacity;
     Uint32 textureCount;
     D3D11Texture **textures;
 
     char *debugName;
+    bool canBeCycled;
 } D3D11TextureContainer;
 
 typedef struct D3D11TextureSubresource
@@ -523,6 +523,7 @@ typedef struct D3D11ComputePipeline
 {
     ID3D11ComputeShader *computeShader;
 
+    Uint32 numSamplers;
     Uint32 numReadonlyStorageTextures;
     Uint32 numWriteonlyStorageTextures;
     Uint32 numReadonlyStorageBuffers;
@@ -609,6 +610,11 @@ typedef struct D3D11UniformBuffer
     Uint32 currentBlockSize;
 } D3D11UniformBuffer;
 
+typedef struct D3D11Sampler
+{
+    ID3D11SamplerState *handle;
+} D3D11Sampler;
+
 typedef struct D3D11Renderer D3D11Renderer;
 
 typedef struct D3D11CommandBuffer
@@ -646,35 +652,40 @@ typedef struct D3D11CommandBuffer
     bool needVertexBufferBind;
 
     bool needVertexSamplerBind;
-    bool needVertexResourceBind;
+    bool needVertexStorageTextureBind;
+    bool needVertexStorageBufferBind;
     bool needVertexUniformBufferBind;
 
     bool needFragmentSamplerBind;
-    bool needFragmentResourceBind;
+    bool needFragmentStorageTextureBind;
+    bool needFragmentStorageBufferBind;
     bool needFragmentUniformBufferBind;
 
-    bool needComputeUAVBind;
-    bool needComputeSRVBind;
+    bool needComputeSamplerBind;
+    bool needComputeReadOnlyTextureBind;
+    bool needComputeReadOnlyBufferBind;
     bool needComputeUniformBufferBind;
 
     ID3D11Buffer *vertexBuffers[MAX_BUFFER_BINDINGS];
     Uint32 vertexBufferOffsets[MAX_BUFFER_BINDINGS];
     Uint32 vertexBufferCount;
 
-    ID3D11SamplerState *vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
-    ID3D11ShaderResourceView *vertexShaderResourceViews[MAX_TEXTURE_SAMPLERS_PER_STAGE +
-                                                        MAX_STORAGE_BUFFERS_PER_STAGE +
-                                                        MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Texture *vertexSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Sampler *vertexSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Texture *vertexStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Buffer *vertexStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
 
-    ID3D11SamplerState *fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
-    ID3D11ShaderResourceView *fragmentShaderResourceViews[MAX_TEXTURE_SAMPLERS_PER_STAGE +
-                                                          MAX_STORAGE_BUFFERS_PER_STAGE +
-                                                          MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Texture *fragmentSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Sampler *fragmentSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Texture *fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Buffer *fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
 
-    ID3D11ShaderResourceView *computeShaderResourceViews[MAX_STORAGE_TEXTURES_PER_STAGE +
-                                                         MAX_STORAGE_BUFFERS_PER_STAGE];
-    ID3D11UnorderedAccessView *computeUnorderedAccessViews[MAX_COMPUTE_WRITE_TEXTURES +
-                                                           MAX_COMPUTE_WRITE_BUFFERS];
+    D3D11Texture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D11Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
+    D3D11Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
+    D3D11TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
+    D3D11Buffer *computeWriteOnlyStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
 
     // Uniform buffers
     D3D11UniformBuffer *vertexUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
@@ -703,11 +714,6 @@ typedef struct D3D11CommandBuffer
     Uint32 usedUniformBufferCapacity;
 } D3D11CommandBuffer;
 
-typedef struct D3D11Sampler
-{
-    ID3D11SamplerState *handle;
-} D3D11Sampler;
-
 struct D3D11Renderer
 {
     ID3D11Device1 *device;
@@ -772,20 +778,19 @@ struct D3D11Renderer
     SDL_Mutex *acquireUniformBufferLock;
     SDL_Mutex *fenceLock;
     SDL_Mutex *windowLock;
-};
 
-// Null arrays for resetting shader resource slots
+    // Null arrays for resetting resource slots
+    ID3D11RenderTargetView *nullRTVs[MAX_COLOR_TARGET_BINDINGS];
 
-ID3D11RenderTargetView *nullRTVs[MAX_COLOR_TARGET_BINDINGS];
-
-ID3D11ShaderResourceView *nullSRVs[MAX_TEXTURE_SAMPLERS_PER_STAGE +
+    ID3D11ShaderResourceView *nullSRVs[MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 +
                                    MAX_STORAGE_TEXTURES_PER_STAGE +
                                    MAX_STORAGE_BUFFERS_PER_STAGE];
 
-ID3D11SamplerState *nullSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    ID3D11SamplerState *nullSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE * 2];
 
-ID3D11UnorderedAccessView *nullUAVs[MAX_COMPUTE_WRITE_TEXTURES +
+    ID3D11UnorderedAccessView *nullUAVs[MAX_COMPUTE_WRITE_TEXTURES +
                                     MAX_COMPUTE_WRITE_BUFFERS];
+};
 
 // Logging
 
@@ -1527,6 +1532,7 @@ static SDL_GPUComputePipeline *D3D11_CreateComputePipeline(
 
     pipeline = SDL_malloc(sizeof(D3D11ComputePipeline));
     pipeline->computeShader = shader;
+    pipeline->numSamplers = createinfo->num_samplers;
     pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
     pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
     pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
@@ -3235,21 +3241,31 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer(
     }
 
     commandBuffer->needVertexSamplerBind = true;
-    commandBuffer->needVertexResourceBind = true;
+    commandBuffer->needVertexStorageTextureBind = true;
+    commandBuffer->needVertexStorageBufferBind = true;
     commandBuffer->needVertexUniformBufferBind = true;
     commandBuffer->needFragmentSamplerBind = true;
-    commandBuffer->needFragmentResourceBind = true;
+    commandBuffer->needFragmentStorageTextureBind = true;
+    commandBuffer->needFragmentStorageBufferBind = true;
     commandBuffer->needFragmentUniformBufferBind = true;
-    commandBuffer->needComputeUAVBind = true;
-    commandBuffer->needComputeSRVBind = true;
     commandBuffer->needComputeUniformBufferBind = true;
 
     SDL_zeroa(commandBuffer->vertexSamplers);
-    SDL_zeroa(commandBuffer->vertexShaderResourceViews);
+    SDL_zeroa(commandBuffer->vertexSamplerTextures);
+    SDL_zeroa(commandBuffer->vertexStorageTextures);
+    SDL_zeroa(commandBuffer->vertexStorageBuffers);
+
     SDL_zeroa(commandBuffer->fragmentSamplers);
-    SDL_zeroa(commandBuffer->fragmentShaderResourceViews);
-    SDL_zeroa(commandBuffer->computeShaderResourceViews);
-    SDL_zeroa(commandBuffer->computeUnorderedAccessViews);
+    SDL_zeroa(commandBuffer->fragmentSamplerTextures);
+    SDL_zeroa(commandBuffer->fragmentStorageTextures);
+    SDL_zeroa(commandBuffer->fragmentStorageBuffers);
+
+    SDL_zeroa(commandBuffer->computeSamplers);
+    SDL_zeroa(commandBuffer->computeSamplerTextures);
+    SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
+    SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
+    SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);
+    SDL_zeroa(commandBuffer->computeWriteOnlyStorageBuffers);
 
     D3D11_INTERNAL_AcquireFence(commandBuffer);
     commandBuffer->autoReleaseFence = 1;
@@ -3489,11 +3505,6 @@ static void D3D11_BeginRenderPass(
     SDL_GPUViewport viewport;
     SDL_Rect scissorRect;
 
-    d3d11CommandBuffer->needVertexSamplerBind = true;
-    d3d11CommandBuffer->needVertexResourceBind = true;
-    d3d11CommandBuffer->needFragmentSamplerBind = true;
-    d3d11CommandBuffer->needFragmentResourceBind = true;
-
     // Clear the bound targets for the current command buffer
     for (Uint32 i = 0; i < MAX_COLOR_TARGET_BINDINGS; i += 1) {
         d3d11CommandBuffer->colorTargetResolveTexture[i] = NULL;
@@ -3717,8 +3728,14 @@ static void D3D11_BindGraphicsPipeline(
         }
     }
 
-    // Mark that uniform bindings are needed
+    // Mark that bindings are needed
+    d3d11CommandBuffer->needVertexSamplerBind = true;
+    d3d11CommandBuffer->needVertexStorageTextureBind = true;
+    d3d11CommandBuffer->needVertexStorageBufferBind = true;
     d3d11CommandBuffer->needVertexUniformBufferBind = true;
+    d3d11CommandBuffer->needFragmentSamplerBind = true;
+    d3d11CommandBuffer->needFragmentStorageTextureBind = true;
+    d3d11CommandBuffer->needFragmentStorageBufferBind = true;
     d3d11CommandBuffer->needFragmentUniformBufferBind = true;
 }
 
@@ -3776,14 +3793,13 @@ static void D3D11_BindVertexSamplers(
             textureContainer->activeTexture);
 
         d3d11CommandBuffer->vertexSamplers[firstSlot + i] =
-            ((D3D11Sampler *)textureSamplerBindings[i].sampler)->handle;
+            (D3D11Sampler *)textureSamplerBindings[i].sampler;
 
-        d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i] =
-            textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->vertexSamplerTextures[firstSlot + i] =
+            textureContainer->activeTexture;
     }
 
     d3d11CommandBuffer->needVertexSamplerBind = true;
-    d3d11CommandBuffer->needVertexResourceBind = true;
 }
 
 static void D3D11_BindVertexStorageTextures(
@@ -3801,11 +3817,11 @@ static void D3D11_BindVertexStorageTextures(
             d3d11CommandBuffer,
             textureContainer->activeTexture);
 
-        d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i +
-                                                      d3d11CommandBuffer->graphicsPipeline->vertexSamplerCount] = textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->vertexStorageTextures[firstSlot + i] =
+            textureContainer->activeTexture;
     }
 
-    d3d11CommandBuffer->needVertexResourceBind = true;
+    d3d11CommandBuffer->needVertexStorageTextureBind = true;
 }
 
 static void D3D11_BindVertexStorageBuffers(
@@ -3825,12 +3841,11 @@ static void D3D11_BindVertexStorageBuffers(
             d3d11CommandBuffer,
             bufferContainer->activeBuffer);
 
-        d3d11CommandBuffer->vertexShaderResourceViews[firstSlot + i +
-                                                      d3d11CommandBuffer->graphicsPipeline->vertexSamplerCount +
-                                                      d3d11CommandBuffer->graphicsPipeline->vertexStorageTextureCount] = bufferContainer->activeBuffer->srv;
+        d3d11CommandBuffer->vertexStorageBuffers[firstSlot + i] =
+            bufferContainer->activeBuffer;
     }
 
-    d3d11CommandBuffer->needVertexResourceBind = true;
+    d3d11CommandBuffer->needVertexStorageBufferBind = true;
 }
 
 static void D3D11_BindFragmentSamplers(
@@ -3849,14 +3864,13 @@ static void D3D11_BindFragmentSamplers(
             textureContainer->activeTexture);
 
         d3d11CommandBuffer->fragmentSamplers[firstSlot + i] =
-            ((D3D11Sampler *)textureSamplerBindings[i].sampler)->handle;
+            (D3D11Sampler *)textureSamplerBindings[i].sampler;
 
-        d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i] =
-            textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->fragmentSamplerTextures[firstSlot + i] =
+            (D3D11Texture *)textureContainer->activeTexture;
     }
 
     d3d11CommandBuffer->needFragmentSamplerBind = true;
-    d3d11CommandBuffer->needFragmentResourceBind = true;
 }
 
 static void D3D11_BindFragmentStorageTextures(
@@ -3874,11 +3888,11 @@ static void D3D11_BindFragmentStorageTextures(
             d3d11CommandBuffer,
             textureContainer->activeTexture);
 
-        d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i +
-                                                        d3d11CommandBuffer->graphicsPipeline->fragmentSamplerCount] = textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->fragmentStorageTextures[firstSlot + i] =
+            textureContainer->activeTexture;
     }
 
-    d3d11CommandBuffer->needFragmentResourceBind = true;
+    d3d11CommandBuffer->needFragmentStorageTextureBind = true;
 }
 
 static void D3D11_BindFragmentStorageBuffers(
@@ -3898,12 +3912,11 @@ static void D3D11_BindFragmentStorageBuffers(
             d3d11CommandBuffer,
             bufferContainer->activeBuffer);
 
-        d3d11CommandBuffer->fragmentShaderResourceViews[firstSlot + i +
-                                                        d3d11CommandBuffer->graphicsPipeline->fragmentSamplerCount +
-                                                        d3d11CommandBuffer->graphicsPipeline->fragmentStorageTextureCount] = bufferContainer->activeBuffer->srv;
+        d3d11CommandBuffer->fragmentStorageBuffers[firstSlot + i] =
+            bufferContainer->activeBuffer;
     }
 
-    d3d11CommandBuffer->needFragmentResourceBind = true;
+    d3d11CommandBuffer->needFragmentStorageBufferBind = true;
 }
 
 static void D3D11_INTERNAL_BindGraphicsResources(
@@ -3911,18 +3924,8 @@ static void D3D11_INTERNAL_BindGraphicsResources(
 {
     D3D11GraphicsPipeline *graphicsPipeline = commandBuffer->graphicsPipeline;
 
-    Uint32 vertexResourceCount =
-        graphicsPipeline->vertexSamplerCount +
-        graphicsPipeline->vertexStorageTextureCount +
-        graphicsPipeline->vertexStorageBufferCount;
-
-    Uint32 fragmentResourceCount =
-        graphicsPipeline->fragmentSamplerCount +
-        graphicsPipeline->fragmentStorageTextureCount +
-        graphicsPipeline->fragmentStorageBufferCount;
-
     ID3D11Buffer *nullBuf = NULL;
-    Uint32 offsetInConstants, blockSizeInConstants, i;
+    Uint32 offsetInConstants, blockSizeInConstants;
 
     if (commandBuffer->needVertexBufferBind) {
         ID3D11DeviceContext_IASetVertexBuffers(
@@ -3936,30 +3939,68 @@ static void D3D11_INTERNAL_BindGraphicsResources(
 
     if (commandBuffer->needVertexSamplerBind) {
         if (graphicsPipeline->vertexSamplerCount > 0) {
+            ID3D11SamplerState *samplerStates[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+            ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->vertexSamplerCount; i += 1) {
+                samplerStates[i] = commandBuffer->vertexSamplers[i]->handle;
+                srvs[i] = commandBuffer->vertexSamplerTextures[i]->shaderView;
+            }
+
             ID3D11DeviceContext_VSSetSamplers(
                 commandBuffer->context,
                 0,
                 graphicsPipeline->vertexSamplerCount,
-                commandBuffer->vertexSamplers);
+                samplerStates);
+
+            ID3D11DeviceContext_VSSetShaderResources(
+                commandBuffer->context,
+                0,
+                graphicsPipeline->vertexSamplerCount,
+                srvs);
         }
 
         commandBuffer->needVertexSamplerBind = false;
     }
 
-    if (commandBuffer->needVertexResourceBind) {
-        if (vertexResourceCount > 0) {
+    if (commandBuffer->needVertexStorageTextureBind) {
+        if (graphicsPipeline->vertexStorageTextureCount > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->vertexStorageTextureCount; i += 1) {
+                srvs[i] = commandBuffer->vertexStorageTextures[i]->shaderView;
+            }
+
+            ID3D11DeviceContext_VSSetShaderResources(
+                commandBuffer->context,
+                graphicsPipeline->vertexSamplerCount,
+                graphicsPipeline->vertexStorageTextureCount,
+                srvs);
+        }
+
+        commandBuffer->needVertexStorageTextureBind = false;
+    }
+
+    if (commandBuffer->needVertexStorageBufferBind) {
+        if (graphicsPipeline->vertexStorageBufferCount > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_BUFFERS_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->vertexStorageBufferCount; i += 1) {
+                srvs[i] = commandBuffer->vertexStorageBuffers[i]->srv;
+            }
+
             ID3D11DeviceContext_VSSetShaderResources(
                 commandBuffer->context,
-                0,
-                vertexResourceCount,
-                commandBuffer->vertexShaderResourceViews);
+                graphicsPipeline->vertexSamplerCount + graphicsPipeline->vertexStorageTextureCount,
+                graphicsPipeline->vertexStorageBufferCount,
+                srvs);
         }
 
-        commandBuffer->needVertexResourceBind = false;
+        commandBuffer->needVertexStorageBufferBind = false;
     }
 
     if (commandBuffer->needVertexUniformBufferBind) {
-        for (i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) {
+        for (Uint32 i = 0; i < graphicsPipeline->vertexUniformBufferCount; i += 1) {
             /* stupid workaround for god awful D3D11 drivers
              * see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1#calling-vssetconstantbuffers1-with-command-list-emulation
              */
@@ -3986,30 +4027,68 @@ static void D3D11_INTERNAL_BindGraphicsResources(
 
     if (commandBuffer->needFragmentSamplerBind) {
         if (graphicsPipeline->fragmentSamplerCount > 0) {
+            ID3D11SamplerState *samplerStates[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+            ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->fragmentSamplerCount; i += 1) {
+                samplerStates[i] = commandBuffer->fragmentSamplers[i]->handle;
+                srvs[i] = commandBuffer->fragmentSamplerTextures[i]->shaderView;
+            }
+
             ID3D11DeviceContext_PSSetSamplers(
                 commandBuffer->context,
                 0,
                 graphicsPipeline->fragmentSamplerCount,
-                commandBuffer->fragmentSamplers);
+                samplerStates);
+
+            ID3D11DeviceContext_PSSetShaderResources(
+                commandBuffer->context,
+                0,
+                graphicsPipeline->fragmentSamplerCount,
+                srvs);
         }
 
         commandBuffer->needFragmentSamplerBind = false;
     }
 
-    if (commandBuffer->needFragmentResourceBind) {
-        if (fragmentResourceCount > 0) {
+    if (commandBuffer->needFragmentStorageTextureBind) {
+        if (graphicsPipeline->fragmentStorageTextureCount > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->fragmentStorageTextureCount; i += 1) {
+                srvs[i] = commandBuffer->fragmentStorageTextures[i]->shaderView;
+            }
+
             ID3D11DeviceContext_PSSetShaderResources(
                 commandBuffer->context,
-                0,
-                fragmentResourceCount,
-                commandBuffer->fragmentShaderResourceViews);
+                graphicsPipeline->fragmentSamplerCount,
+                graphicsPipeline->fragmentStorageTextureCount,
+                srvs);
+        }
+
+        commandBuffer->needFragmentStorageTextureBind = false;
+    }
+
+    if (commandBuffer->needFragmentStorageBufferBind) {
+        if (graphicsPipeline->fragmentStorageBufferCount > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_BUFFERS_PER_STAGE];
+
+            for (Uint32 i = 0; i < graphicsPipeline->fragmentStorageBufferCount; i += 1) {
+                srvs[i] = commandBuffer->fragmentStorageBuffers[i]->srv;
+            }
+
+            ID3D11DeviceContext_PSSetShaderResources(
+                commandBuffer->context,
+                graphicsPipeline->fragmentSamplerCount + graphicsPipeline->fragmentStorageTextureCount,
+                graphicsPipeline->fragmentStorageBufferCount,
+                srvs);
         }
 
-        commandBuffer->needFragmentResourceBind = false;
+        commandBuffer->needFragmentStorageBufferBind = false;
     }
 
     if (commandBuffer->needFragmentUniformBufferBind) {
-        for (i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) {
+        for (Uint32 i = 0; i < graphicsPipeline->fragmentUniformBufferCount; i += 1) {
             /* stupid workaround for god awful D3D11 drivers
              * see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-pssetconstantbuffers1#calling-pssetconstantbuffers1-with-command-list-emulation
              */
@@ -4127,6 +4206,7 @@ static void D3D11_EndRenderPass(
     SDL_GPUCommandBuffer *commandBuffer)
 {
     D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
+    D3D11Renderer *renderer = d3d11CommandBuffer->renderer;
     Uint32 i;
 
     // Set render target slots to NULL to avoid NULL set behavior
@@ -4134,7 +4214,7 @@ static void D3D11_EndRenderPass(
     ID3D11DeviceContext_OMSetRenderTargets(
         d3d11CommandBuffer->context,
         MAX_COLOR_TARGET_BINDINGS,
-        nullRTVs,
+        renderer->nullRTVs,
         NULL);
 
     // Resolve MSAA color render targets
@@ -4150,16 +4230,44 @@ static void D3D11_EndRenderPass(
         }
     }
 
+    ID3D11DeviceContext_VSSetSamplers(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE,
+        renderer->nullSamplers);
+
+    ID3D11DeviceContext_VSSetShaderResources(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE,
+        renderer->nullSRVs);
+
+    ID3D11DeviceContext_PSSetSamplers(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE,
+        renderer->nullSamplers);
+
+    ID3D11DeviceContext_PSSetShaderResources(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE * 2 + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE,
+        renderer->nullSRVs);
+
     // Reset bind state
     SDL_zeroa(d3d11CommandBuffer->vertexBuffers);
     SDL_zeroa(d3d11CommandBuffer->vertexBufferOffsets);
     d3d11CommandBuffer->vertexBufferCount = 0;
 
     SDL_zeroa(d3d11CommandBuffer->vertexSamplers);
-    SDL_zeroa(d3d11CommandBuffer->vertexShaderResourceViews);
+    SDL_zeroa(d3d11CommandBuffer->vertexSamplerTextures);
+    SDL_zeroa(d3d11CommandBuffer->vertexStorageTextures);
+    SDL_zeroa(d3d11CommandBuffer->vertexStorageBuffers);
 
     SDL_zeroa(d3d11CommandBuffer->fragmentSamplers);
-    SDL_zeroa(d3d11CommandBuffer->fragmentShaderResourceViews);
+    SDL_zeroa(d3d11CommandBuffer->fragmentSamplerTextures);
+    SDL_zeroa(d3d11CommandBuffer->fragmentStorageTextures);
+    SDL_zeroa(d3d11CommandBuffer->fragmentStorageBuffers);
 }
 
 static void D3D11_PushVertexUniformData(
@@ -4229,13 +4337,10 @@ static void D3D11_BeginComputePass(
     D3D11TextureSubresource *textureSubresource;
     D3D11BufferContainer *bufferContainer;
     D3D11Buffer *buffer;
-    Uint32 i;
+    ID3D11UnorderedAccessView *uavs[MAX_COMPUTE_WRITE_TEXTURES + MAX_COMPUTE_WRITE_BUFFERS];
 
-    for (i = 0; i < numStorageTextureBindings; i += 1) {
+    for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
         textureContainer = (D3D11TextureContainer *)storageTextureBindings[i].texture;
-        if (!(textureContainer->header.info.usage & SDL_GPU_TEXTUREUSAGE_COMPUTE_STORAGE_WRITE)) {
-            SDL_LogError(SDL_LOG_CATEGORY_GPU, "Attempted to bind read-only texture as compute write texture");
-        }
 
         textureSubresource = D3D11_INTERNAL_PrepareTextureSubresourceForWrite(
             d3d11CommandBuffer->renderer,
@@ -4248,10 +4353,10 @@ static void D3D11_BeginComputePass(
             d3d11CommandBuffer,
             textureSubresource->parent);
 
-        d3d11CommandBuffer->computeUnorderedAccessViews[i] = textureSubresource->uav;
+        d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i] = textureSubresource;
     }
 
-    for (i = 0; i < numStorageBufferBindings; i += 1) {
+    for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
         bufferContainer = (D3D11BufferContainer *)storageBufferBindings[i].buffer;
 
         buffer = D3D11_INTERNAL_PrepareBufferForWrite(
@@ -4263,10 +4368,23 @@ static void D3D11_BeginComputePass(
             d3d11CommandBuffer,
             buffer);
 
-        d3d11CommandBuffer->computeUnorderedAccessViews[i + numStorageTextureBindings] = buffer->uav;
+        d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i] = buffer;
+    }
+
+    for (Uint32 i = 0; i < numStorageTextureBindings; i += 1) {
+        uavs[i] = d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uav;
     }
 
-    d3d11CommandBuffer->needComputeUAVBind = true;
+    for (Uint32 i = 0; i < numStorageBufferBindings; i += 1) {
+        uavs[numStorageTextureBindings + i] = d3d11CommandBuffer->computeWriteOnlyStorageBuffers[i]->uav;
+    }
+
+    ID3D11DeviceContext_CSSetUnorderedAccessViews(
+        d3d11CommandBuffer->context,
+        0,
+        numStorageTextureBindings + numStorageBufferBindings,
+        uavs,
+        NULL);
 }
 
 static void D3D11_BindComputePipeline(
@@ -4292,9 +4410,37 @@ static void D3D11_BindComputePipeline(
         }
     }
 
+    d3d11CommandBuffer->needComputeSamplerBind = true;
+    d3d11CommandBuffer->needComputeReadOnlyTextureBind = true;
+    d3d11CommandBuffer->needComputeReadOnlyBufferBind = true;
     d3d11CommandBuffer->needComputeUniformBufferBind = true;
 }
 
+static void D3D11_BindComputeSamplers(
+    SDL_GPUCommandBuffer *commandBuffer,
+    Uint32 firstSlot,
+    const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
+    Uint32 numBindings)
+{
+    D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
+
+    for (Uint32 i = 0; i < numBindings; i += 1) {
+        D3D11TextureContainer *textureContainer = (D3D11TextureContainer *)textureSamplerBindings[i].texture;
+
+        D3D11_INTERNAL_TrackTexture(
+            d3d11CommandBuffer,
+            textureContainer->activeTexture);
+
+        d3d11CommandBuffer->computeSamplers[firstSlot + i] =
+            (D3D11Sampler *)textureSamplerBindings[i].sampler;
+
+        d3d11CommandBuffer->computeSamplerTextures[firstSlot + i] =
+            textureContainer->activeTexture;
+    }
+
+    d3d11CommandBuffer->needComputeSamplerBind = true;
+}
+
 static void D3D11_BindComputeStorageTextures(
     SDL_GPUCommandBuffer *commandBuffer,
     Uint32 firstSlot,
@@ -4310,11 +4456,11 @@ static void D3D11_BindComputeStorageTextures(
             d3d11CommandBuffer,
             textureContainer->activeTexture);
 
-        d3d11CommandBuffer->computeShaderResourceViews[firstSlot + i] =
-            textureContainer->activeTexture->shaderView;
+        d3d11CommandBuffer->computeReadOnlyStorageTextures[firstSlot + i] =
+            textureContainer->activeTexture;
     }
 
-    d3d11CommandBuffer->needComputeSRVBind = true;
+    d3d11CommandBuffer->needComputeReadOnlyTextureBind = true;
 }
 
 static void D3D11_BindComputeStorageBuffers(
@@ -4334,11 +4480,11 @@ static void D3D11_BindComputeStorageBuffers(
             d3d11CommandBuffer,
             bufferContainer->activeBuffer);
 
-        d3d11CommandBuffer->computeShaderResourceViews[firstSlot + i +
-                                                       d3d11CommandBuffer->computePipeline->numReadonlyStorageTextures] = bufferContainer->activeBuffer->srv;
+        d3d11CommandBuffer->computeReadOnlyStorageBuffers[firstSlot + i] =
+            bufferContainer->activeBuffer;
     }
 
-    d3d11CommandBuffer->needComputeSRVBind = true;
+    d3d11CommandBuffer->needComputeReadOnlyBufferBind = true;
 }
 
 static void D3D11_PushComputeUniformData(
@@ -4360,40 +4506,73 @@ static void D3D11_INTERNAL_BindComputeResources(
 {
     D3D11ComputePipeline *computePipeline = commandBuffer->computePipeline;
 
-    Uint32 readOnlyResourceCount =
-        computePipeline->numReadonlyStorageTextures +
-        computePipeline->numReadonlyStorageBuffers;
+    ID3D11Buffer *nullBuf = NULL;
+    Uint32 offsetInConstants, blockSizeInConstants;
 
-    Uint32 writeOnlyResourceCount =
-        computePipeline->numWriteonlyStorageTextures +
-        computePipeline->numWriteonlyStorageBuffers;
+    if (commandBuffer->needComputeSamplerBind) {
+        if (computePipeline->numSamplers > 0) {
+            ID3D11SamplerState *samplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+            ID3D11ShaderResourceView *srvs[MAX_TEXTURE_SAMPLERS_PER_STAGE];
 
-    ID3D11Buffer *nullBuf = NULL;
-    Uint32 offsetInConstants, blockSizeInConstants, i;
+            for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) {
+                samplers[i] = commandBuffer->computeSamplers[i]->handle;
+                srvs[i] = commandBuffer->computeSamplerTextures[i]->shaderView;
+            }
 
-    if (commandBuffer->needComputeUAVBind) {
-        ID3D11DeviceContext_CSSetUnorderedAccessViews(
-            commandBuffer->context,
-            0,
-            writeOnlyResourceCount,
-            commandBuffer->computeUnorderedAccessViews,
-            NULL);
+            ID3D11DeviceContext_CSSetSamplers(
+                commandBuffer->context,
+                0,
+                computePipeline->numSamplers,
+                samplers);
+
+            ID3D11DeviceContext_CSSetShaderResources(
+                commandBuffer->context,
+                0,
+                computePipeline->numSamplers,
+                srvs);
+        }
 
-        commandBuffer->needComputeUAVBind = false;
+        commandBuffer->needComputeSamplerBind = false;
     }
 
-    if (commandBuffer->needComputeSRVBind) {
-        ID3D11DeviceContext_CSSetShaderResources(
-            commandBuffer->context,
-            0,
-            readOnlyResourceCount,
-            commandBuffer->computeShaderResourceViews);
+    if (commandBuffer->needComputeReadOnlyTextureBind) {
+        if (computePipeline->numReadonlyStorageTextures > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
+
+            for (Uint32 i = 0; i < computePipeline->numReadonlyStorageTextures; i += 1) {
+                srvs[i] = commandBuffer->computeReadOnlyStorageTextures[i]->shaderView;
+            }
 
-        commandBuffer->needComputeSRVBind = false;
+            ID3D11DeviceContext_CSSetShaderResources(
+                commandBuffer->context,
+                computePipeline->numSamplers,
+                computePipeline->numReadonlyStorageTextures,
+                srvs);
+        }
+
+        commandBuffer->needComputeReadOnlyTextureBind = false;
+    }
+
+    if (commandBuffer->needComputeReadOnlyBufferBind) {
+        if (computePipeline->numReadonlyStorageBuffers > 0) {
+            ID3D11ShaderResourceView *srvs[MAX_STORAGE_TEXTURES_PER_STAGE];
+
+            for (Uint32 i = 0; i < computePipeline->numReadonlyStorageBuffers; i += 1) {
+                srvs[i] = commandBuffer->computeReadOnlyStorageBuffers[i]->srv;
+            }
+
+            ID3D11DeviceContext_CSSetShaderResources(
+                commandBuffer->context,
+                computePipeline->numSamplers + computePipeline->numReadonlyStorageTextures,
+                computePipeline->numReadonlyStorageBuffers,
+                srvs);
+        }
+
+        commandBuffer->needComputeReadOnlyBufferBind = false;
     }
 
     if (commandBuffer->needComputeUniformBufferBind) {
-        for (i = 0; i < computePipeline->numUniformBuffers; i += 1) {
+        for (Uint32 i = 0; i < computePipeline->numUniformBuffers; i += 1) {
             /* stupid workaround for god awful D3D11 drivers
              * see: https://learn.microsoft.com/en-us/windows/win32/api/d3d11_1/nf-d3d11_1-id3d11devicecontext1-vssetconstantbuffers1#calling-vssetconstantbuffers1-with-command-list-emulation
              */
@@ -4456,6 +4635,7 @@ static void D3D11_EndComputePass(
     SDL_GPUCommandBuffer *commandBuffer)
 {
     D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer;
+    D3D11Renderer *renderer = d3d11CommandBuffer->renderer;
 
     // reset UAV slots to avoid NULL set behavior
     // https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11devicecontext-cssetshaderresources
@@ -4463,14 +4643,30 @@ static void D3D11_EndComputePass(
         d3d11CommandBuffer->context,
         0,
         MAX_COMPUTE_WRITE_TEXTURES + MAX_COMPUTE_WRITE_BUFFERS,
-        nullUAVs,
+        renderer->nullUAVs,
         NULL);
 
+    ID3D11DeviceContext_CSSetSamplers(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE,
+        renderer->nullSamplers);
+
+    ID3D11DeviceContext_CSSetShaderResources(
+        d3d11CommandBuffer->context,
+        0,
+        MAX_TEXTURE_SAMPLERS_PER_STAGE + MAX_STORAGE_TEXTURES_PER_STAGE + MAX_STORAGE_BUFFERS_PER_STAGE,
+        renderer->nullSRVs);
+
     d3d11CommandBuffer->computePipeline = NULL;
 
     // Reset bind state
-    SDL_zeroa(d3d11CommandBuffer->computeUnorderedAccessViews);
-    SDL_zeroa(d3d11CommandBuffer->computeShaderResourceViews);
+    SDL_zeroa(d3d11CommandBuffer->computeSamplers);
+    SDL_zeroa(d3d11CommandBuffer->computeSamplerTextures);
+    SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageTextures);
+    SDL_zeroa(d3d11CommandBuffer->computeReadOnlyStorageBuffers);
+    SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageTextureSubresources);
+    SDL_zeroa(d3d11CommandBuffer->computeWriteOnlyStorageBuffers);
 }
 
 // Fence Cleanup
@@ -6185,10 +6381,10 @@ tryCreateDevice:
 
     // Initialize null states
 
-    SDL_zeroa(nullRTVs);
-    SDL_zeroa(nullSRVs);
-    SDL_zeroa(nullSamplers);
-    SDL_zeroa(nullUAVs);
+    SDL_zeroa(renderer->nullRTVs);
+    SDL_zeroa(renderer->nullSRVs);
+    SDL_zeroa(renderer->nullSamplers);
+    SDL_zeroa(renderer->nullUAVs);
 
     // Initialize built-in pipelines
     D3D11_INTERNAL_InitBlitPipelines(renderer);

+ 132 - 20
src/gpu/d3d12/SDL_gpu_d3d12.c

@@ -694,6 +694,7 @@ struct D3D12CommandBuffer
     bool needFragmentStorageBufferBind;
     bool needFragmentUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE];
 
+    bool needComputeSamplerBind;
     bool needComputeReadOnlyStorageTextureBind;
     bool needComputeReadOnlyStorageBufferBind;
     bool needComputeUniformBufferBind[MAX_UNIFORM_BUFFERS_PER_STAGE];
@@ -714,6 +715,8 @@ struct D3D12CommandBuffer
     D3D12Buffer *fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
     D3D12UniformBuffer *fragmentUniformBuffers[MAX_UNIFORM_BUFFERS_PER_STAGE];
 
+    D3D12Texture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    D3D12Sampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     D3D12Texture *computeReadOnlyStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
     D3D12Buffer *computeReadOnlyStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
     D3D12TextureSubresource *computeWriteOnlyStorageTextureSubresources[MAX_COMPUTE_WRITE_TEXTURES];
@@ -805,6 +808,8 @@ typedef struct D3D12ComputeRootSignature
 {
     ID3D12RootSignature *handle;
 
+    Sint32 samplerRootIndex;
+    Sint32 samplerTextureRootIndex;
     Sint32 readOnlyStorageTextureRootIndex;
     Sint32 readOnlyStorageBufferRootIndex;
     Sint32 writeOnlyStorageTextureRootIndex;
@@ -817,10 +822,11 @@ struct D3D12ComputePipeline
     ID3D12PipelineState *pipelineState;
     D3D12ComputeRootSignature *rootSignature;
 
-    Uint32 num_readonly_storage_textures;
-    Uint32 num_readonly_storage_buffers;
-    Uint32 num_writeonly_storage_textures;
-    Uint32 num_writeonly_storage_buffers;
+    Uint32 numSamplers;
+    Uint32 numReadOnlyStorageTextures;
+    Uint32 numReadOnlyStorageBuffers;
+    Uint32 numWriteOnlyStorageTextures;
+    Uint32 numWriteOnlyStorageBuffers;
     Uint32 numUniformBuffers;
 
     SDL_AtomicInt referenceCount;
@@ -2145,6 +2151,8 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
     SDL_zeroa(descriptorRanges);
     SDL_zero(rootParameter);
 
+    d3d12ComputeRootSignature->samplerRootIndex = -1;
+    d3d12ComputeRootSignature->samplerTextureRootIndex = -1;
     d3d12ComputeRootSignature->readOnlyStorageTextureRootIndex = -1;
     d3d12ComputeRootSignature->readOnlyStorageBufferRootIndex = -1;
     d3d12ComputeRootSignature->writeOnlyStorageTextureRootIndex = -1;
@@ -2154,10 +2162,44 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
         d3d12ComputeRootSignature->uniformBufferRootIndex[i] = -1;
     }
 
+    if (createInfo->num_samplers) {
+        descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
+        descriptorRange.NumDescriptors = createInfo->num_samplers;
+        descriptorRange.BaseShaderRegister = 0;
+        descriptorRange.RegisterSpace = 0;
+        descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+        descriptorRanges[rangeCount] = descriptorRange;
+
+        rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+        rootParameter.DescriptorTable.NumDescriptorRanges = 1;
+        rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
+        rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
+        rootParameters[parameterCount] = rootParameter;
+        d3d12ComputeRootSignature->samplerRootIndex = parameterCount;
+        rangeCount += 1;
+        parameterCount += 1;
+
+        descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
+        descriptorRange.NumDescriptors = createInfo->num_samplers;
+        descriptorRange.BaseShaderRegister = 0;
+        descriptorRange.RegisterSpace = 0;
+        descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+        descriptorRanges[rangeCount] = descriptorRange;
+
+        rootParameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+        rootParameter.DescriptorTable.NumDescriptorRanges = 1;
+        rootParameter.DescriptorTable.pDescriptorRanges = &descriptorRanges[rangeCount];
+        rootParameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; // ALL is used for compute
+        rootParameters[parameterCount] = rootParameter;
+        d3d12ComputeRootSignature->samplerTextureRootIndex = parameterCount;
+        rangeCount += 1;
+        parameterCount += 1;
+    }
+
     if (createInfo->num_readonly_storage_textures) {
         descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
         descriptorRange.NumDescriptors = createInfo->num_readonly_storage_textures;
-        descriptorRange.BaseShaderRegister = 0;
+        descriptorRange.BaseShaderRegister = createInfo->num_samplers;
         descriptorRange.RegisterSpace = 0;
         descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
         descriptorRanges[rangeCount] = descriptorRange;
@@ -2175,7 +2217,7 @@ static D3D12ComputeRootSignature *D3D12_INTERNAL_CreateComputeRootSignature(
     if (createInfo->num_readonly_storage_buffers) {
         descriptorRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
         descriptorRange.NumDescriptors = createInfo->num_readonly_storage_buffers;
-        descriptorRange.BaseShaderRegister = createInfo->num_readonly_storage_textures;
+        descriptorRange.BaseShaderRegister = createInfo->num_samplers + createInfo->num_readonly_storage_textures;
         descriptorRange.RegisterSpace = 0;
         descriptorRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
         descriptorRanges[rangeCount] = descriptorRange;
@@ -2346,10 +2388,11 @@ static SDL_GPUComputePipeline *D3D12_CreateComputePipeline(
 
     computePipeline->pipelineState = pipelineState;
     computePipeline->rootSignature = rootSignature;
-    computePipeline->num_readonly_storage_textures = createinfo->num_readonly_storage_textures;
-    computePipeline->num_readonly_storage_buffers = createinfo->num_readonly_storage_buffers;
-    computePipeline->num_writeonly_storage_textures = createinfo->num_writeonly_storage_textures;
-    computePipeline->num_writeonly_storage_buffers = createinfo->num_writeonly_storage_buffers;
+    computePipeline->numSamplers = createinfo->num_samplers;
+    computePipeline->numReadOnlyStorageTextures = createinfo->num_readonly_storage_textures;
+    computePipeline->numReadOnlyStorageBuffers = createinfo->num_readonly_storage_buffers;
+    computePipeline->numWriteOnlyStorageTextures = createinfo->num_writeonly_storage_textures;
+    computePipeline->numWriteOnlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
     computePipeline->numUniformBuffers = createinfo->num_uniform_buffers;
     SDL_AtomicSet(&computePipeline->referenceCount, 0);
 
@@ -4889,6 +4932,7 @@ static void D3D12_BindComputePipeline(
 
     d3d12CommandBuffer->currentComputePipeline = pipeline;
 
+    d3d12CommandBuffer->needComputeSamplerBind = true;
     d3d12CommandBuffer->needComputeReadOnlyStorageTextureBind = true;
     d3d12CommandBuffer->needComputeReadOnlyStorageBufferBind = true;
 
@@ -4906,8 +4950,8 @@ static void D3D12_BindComputePipeline(
     D3D12_INTERNAL_TrackComputePipeline(d3d12CommandBuffer, pipeline);
 
     // Bind write-only resources after setting root signature
-    if (pipeline->num_writeonly_storage_textures > 0) {
-        for (Uint32 i = 0; i < pipeline->num_writeonly_storage_textures; i += 1) {
+    if (pipeline->numWriteOnlyStorageTextures > 0) {
+        for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageTextures; i += 1) {
             cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageTextureSubresources[i]->uavHandle.cpuHandle;
         }
 
@@ -4924,8 +4968,8 @@ static void D3D12_BindComputePipeline(
             gpuDescriptorHandle);
     }
 
-    if (pipeline->num_writeonly_storage_buffers > 0) {
-        for (Uint32 i = 0; i < pipeline->num_writeonly_storage_buffers; i += 1) {
+    if (pipeline->numWriteOnlyStorageBuffers > 0) {
+        for (Uint32 i = 0; i < pipeline->numWriteOnlyStorageBuffers; i += 1) {
             cpuHandles[i] = d3d12CommandBuffer->computeWriteOnlyStorageBuffers[i]->uavDescriptor.cpuHandle;
         }
 
@@ -4943,6 +4987,32 @@ static void D3D12_BindComputePipeline(
     }
 }
 
+static void D3D12_BindComputeSamplers(
+    SDL_GPUCommandBuffer *commandBuffer,
+    Uint32 firstSlot,
+    const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
+    Uint32 numBindings)
+{
+    D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer;
+
+    for (Uint32 i = 0; i < numBindings; i += 1) {
+        D3D12TextureContainer *container = (D3D12TextureContainer *)textureSamplerBindings[i].texture;
+
+        D3D12_INTERNAL_TrackSampler(
+            d3d12CommandBuffer,
+            (D3D12Sampler *)textureSamplerBindings[i].sampler);
+
+        D3D12_INTERNAL_TrackTexture(
+            d3d12CommandBuffer,
+            container->activeTexture);
+
+        d3d12CommandBuffer->computeSamplerTextures[firstSlot + i] = container->activeTexture;
+        d3d12CommandBuffer->computeSamplers[firstSlot + i] = (D3D12Sampler *)textureSamplerBindings[i].sampler;
+    }
+
+    d3d12CommandBuffer->needComputeSamplerBind = true;
+}
+
 static void D3D12_BindComputeStorageTextures(
     SDL_GPUCommandBuffer *commandBuffer,
     Uint32 firstSlot,
@@ -5033,9 +5103,46 @@ static void D3D12_INTERNAL_BindComputeResources(
     D3D12_CPU_DESCRIPTOR_HANDLE cpuHandles[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     D3D12_GPU_DESCRIPTOR_HANDLE gpuDescriptorHandle;
 
+    if (commandBuffer->needComputeSamplerBind) {
+        if (computePipeline->numSamplers > 0) {
+            for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) {
+                cpuHandles[i] = commandBuffer->computeSamplers[i]->handle.cpuHandle;
+            }
+
+            D3D12_INTERNAL_WriteGPUDescriptors(
+                commandBuffer,
+                D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER,
+                cpuHandles,
+                computePipeline->numSamplers,
+                &gpuDescriptorHandle);
+
+            ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
+                commandBuffer->graphicsCommandList,
+                computePipeline->rootSignature->samplerRootIndex,
+                gpuDescriptorHandle);
+
+            for (Uint32 i = 0; i < computePipeline->numSamplers; i += 1) {
+                cpuHandles[i] = commandBuffer->computeSamplerTextures[i]->srvHandle.cpuHandle;
+            }
+
+            D3D12_INTERNAL_WriteGPUDescriptors(
+                commandBuffer,
+                D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
+                cpuHandles,
+                computePipeline->numSamplers,
+                &gpuDescriptorHandle);
+
+            ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
+                commandBuffer->graphicsCommandList,
+                computePipeline->rootSignature->samplerTextureRootIndex,
+                gpuDescriptorHandle);
+        }
+        commandBuffer->needComputeSamplerBind = false;
+    }
+
     if (commandBuffer->needComputeReadOnlyStorageTextureBind) {
-        if (computePipeline->num_readonly_storage_textures > 0) {
-            for (Uint32 i = 0; i < computePipeline->num_readonly_storage_textures; i += 1) {
+        if (computePipeline->numReadOnlyStorageTextures > 0) {
+            for (Uint32 i = 0; i < computePipeline->numReadOnlyStorageTextures; i += 1) {
                 cpuHandles[i] = commandBuffer->computeReadOnlyStorageTextures[i]->srvHandle.cpuHandle;
             }
 
@@ -5043,7 +5150,7 @@ static void D3D12_INTERNAL_BindComputeResources(
                 commandBuffer,
                 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
                 cpuHandles,
-                computePipeline->num_readonly_storage_textures,
+                computePipeline->numReadOnlyStorageTextures,
                 &gpuDescriptorHandle);
 
             ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
@@ -5055,8 +5162,8 @@ static void D3D12_INTERNAL_BindComputeResources(
     }
 
     if (commandBuffer->needComputeReadOnlyStorageBufferBind) {
-        if (computePipeline->num_readonly_storage_buffers > 0) {
-            for (Uint32 i = 0; i < computePipeline->num_readonly_storage_buffers; i += 1) {
+        if (computePipeline->numReadOnlyStorageBuffers > 0) {
+            for (Uint32 i = 0; i < computePipeline->numReadOnlyStorageBuffers; i += 1) {
                 cpuHandles[i] = commandBuffer->computeReadOnlyStorageBuffers[i]->srvDescriptor.cpuHandle;
             }
 
@@ -5064,7 +5171,7 @@ static void D3D12_INTERNAL_BindComputeResources(
                 commandBuffer,
                 D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
                 cpuHandles,
-                computePipeline->num_readonly_storage_buffers,
+                computePipeline->numReadOnlyStorageBuffers,
                 &gpuDescriptorHandle);
 
             ID3D12GraphicsCommandList_SetComputeRootDescriptorTable(
@@ -5174,6 +5281,9 @@ static void D3D12_EndComputePass(
         }
     }
 
+    SDL_zeroa(d3d12CommandBuffer->computeSamplerTextures);
+    SDL_zeroa(d3d12CommandBuffer->computeSamplers);
+
     d3d12CommandBuffer->currentComputePipeline = NULL;
 }
 
@@ -6751,6 +6861,8 @@ static SDL_GPUCommandBuffer *D3D12_AcquireCommandBuffer(
     SDL_zeroa(commandBuffer->fragmentStorageBuffers);
     SDL_zeroa(commandBuffer->fragmentUniformBuffers);
 
+    SDL_zeroa(commandBuffer->computeSamplerTextures);
+    SDL_zeroa(commandBuffer->computeSamplers);
     SDL_zeroa(commandBuffer->computeReadOnlyStorageTextures);
     SDL_zeroa(commandBuffer->computeReadOnlyStorageBuffers);
     SDL_zeroa(commandBuffer->computeWriteOnlyStorageTextureSubresources);

+ 86 - 35
src/gpu/metal/SDL_gpu_metal.m

@@ -403,7 +403,7 @@ typedef struct MetalShader
     id<MTLLibrary> library;
     id<MTLFunction> function;
 
-    Uint32 num_samplers;
+    Uint32 numSamplers;
     Uint32 numUniformBuffers;
     Uint32 numStorageBuffers;
     Uint32 numStorageTextures;
@@ -434,14 +434,15 @@ typedef struct MetalGraphicsPipeline
 typedef struct MetalComputePipeline
 {
     id<MTLComputePipelineState> handle;
-    Uint32 num_readonly_storage_textures;
-    Uint32 num_writeonly_storage_textures;
-    Uint32 num_readonly_storage_buffers;
-    Uint32 num_writeonly_storage_buffers;
+    Uint32 numSamplers;
+    Uint32 numReadonlyStorageTextures;
+    Uint32 numWriteonlyStorageTextures;
+    Uint32 numReadonlyStorageBuffers;
+    Uint32 numWriteonlyStorageBuffers;
     Uint32 numUniformBuffers;
-    Uint32 threadcount_x;
-    Uint32 threadcount_y;
-    Uint32 threadcount_z;
+    Uint32 threadcountX;
+    Uint32 threadcountY;
+    Uint32 threadcountZ;
 } MetalComputePipeline;
 
 typedef struct MetalBuffer
@@ -511,6 +512,7 @@ typedef struct MetalCommandBuffer
     bool needFragmentStorageBufferBind;
     bool needFragmentUniformBind;
 
+    bool needComputeSamplerBind;
     bool needComputeTextureBind;
     bool needComputeBufferBind;
     bool needComputeUniformBind;
@@ -525,6 +527,8 @@ typedef struct MetalCommandBuffer
     id<MTLTexture> fragmentStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
     id<MTLBuffer> fragmentStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
 
+    id<MTLTexture> computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    id<MTLSamplerState> computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     id<MTLTexture> computeReadOnlyTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
     id<MTLBuffer> computeReadOnlyBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
     id<MTLTexture> computeWriteOnlyTextures[MAX_COMPUTE_WRITE_TEXTURES];
@@ -984,14 +988,15 @@ static SDL_GPUComputePipeline *METAL_CreateComputePipeline(
 
         pipeline = SDL_calloc(1, sizeof(MetalComputePipeline));
         pipeline->handle = handle;
-        pipeline->num_readonly_storage_textures = createinfo->num_readonly_storage_textures;
-        pipeline->num_writeonly_storage_textures = createinfo->num_writeonly_storage_textures;
-        pipeline->num_readonly_storage_buffers = createinfo->num_readonly_storage_buffers;
-        pipeline->num_writeonly_storage_buffers = createinfo->num_writeonly_storage_buffers;
+        pipeline->numSamplers = createinfo->num_samplers;
+        pipeline->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
+        pipeline->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
+        pipeline->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
+        pipeline->numWriteonlyStorageBuffers = createinfo->num_writeonly_storage_buffers;
         pipeline->numUniformBuffers = createinfo->num_uniform_buffers;
-        pipeline->threadcount_x = createinfo->threadcount_x;
-        pipeline->threadcount_y = createinfo->threadcount_y;
-        pipeline->threadcount_z = createinfo->threadcount_z;
+        pipeline->threadcountX = createinfo->threadcount_x;
+        pipeline->threadcountY = createinfo->threadcount_y;
+        pipeline->threadcountZ = createinfo->threadcount_z;
 
         return (SDL_GPUComputePipeline *)pipeline;
     }
@@ -1123,11 +1128,11 @@ static SDL_GPUGraphicsPipeline *METAL_CreateGraphicsPipeline(
         result->depth_stencil_state = depthStencilState;
         result->rasterizerState = createinfo->rasterizer_state;
         result->primitiveType = createinfo->primitive_type;
-        result->vertexSamplerCount = vertexShader->num_samplers;
+        result->vertexSamplerCount = vertexShader->numSamplers;
         result->vertexUniformBufferCount = vertexShader->numUniformBuffers;
         result->vertexStorageBufferCount = vertexShader->numStorageBuffers;
         result->vertexStorageTextureCount = vertexShader->numStorageTextures;
-        result->fragmentSamplerCount = fragmentShader->num_samplers;
+        result->fragmentSamplerCount = fragmentShader->numSamplers;
         result->fragmentUniformBufferCount = fragmentShader->numUniformBuffers;
         result->fragmentStorageBufferCount = fragmentShader->numStorageBuffers;
         result->fragmentStorageTextureCount = fragmentShader->numStorageTextures;
@@ -1309,7 +1314,7 @@ static SDL_GPUShader *METAL_CreateShader(
         result = SDL_calloc(1, sizeof(MetalShader));
         result->library = libraryFunction.library;
         result->function = libraryFunction.function;
-        result->num_samplers = createinfo->num_samplers;
+        result->numSamplers = createinfo->num_samplers;
         result->numStorageBuffers = createinfo->num_storage_buffers;
         result->numStorageTextures = createinfo->num_storage_textures;
         result->numUniformBuffers = createinfo->num_uniform_buffers;
@@ -2042,6 +2047,7 @@ static SDL_GPUCommandBuffer *METAL_AcquireCommandBuffer(
         commandBuffer->needFragmentStorageTextureBind = true;
         commandBuffer->needFragmentStorageBufferBind = true;
         commandBuffer->needFragmentUniformBind = true;
+        commandBuffer->needComputeSamplerBind = true;
         commandBuffer->needComputeBufferBind = true;
         commandBuffer->needComputeTextureBind = true;
         commandBuffer->needComputeUniformBind = true;
@@ -2627,41 +2633,54 @@ static void METAL_INTERNAL_BindComputeResources(
     MetalCommandBuffer *commandBuffer)
 {
     MetalComputePipeline *computePipeline = commandBuffer->compute_pipeline;
-    NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 }; // 8 is the max for both read and write-only
+    NSUInteger offsets[MAX_STORAGE_BUFFERS_PER_STAGE] = { 0 };
+
+    if (commandBuffer->needComputeSamplerBind) {
+        // Bind sampler textures
+        if (computePipeline->numSamplers > 0) {
+            [commandBuffer->computeEncoder setTextures:commandBuffer->computeSamplerTextures
+                                             withRange:NSMakeRange(0, computePipeline->numSamplers)];
+            [commandBuffer->computeEncoder setSamplerStates:commandBuffer->computeSamplers
+                                                  withRange:NSMakeRange(0, computePipeline->numSamplers)];
+        }
+        commandBuffer->needComputeSamplerBind = false;
+    }
 
     if (commandBuffer->needComputeTextureBind) {
         // Bind read-only textures
-        if (computePipeline->num_readonly_storage_textures > 0) {
+        if (computePipeline->numReadonlyStorageTextures > 0) {
             [commandBuffer->computeEncoder setTextures:commandBuffer->computeReadOnlyTextures
-                                             withRange:NSMakeRange(0, computePipeline->num_readonly_storage_textures)];
+                                             withRange:NSMakeRange(
+                                                           computePipeline->numSamplers,
+                                                           computePipeline->numReadonlyStorageTextures)];
         }
 
         // Bind write-only textures
-        if (computePipeline->num_writeonly_storage_textures > 0) {
+        if (computePipeline->numWriteonlyStorageTextures > 0) {
             [commandBuffer->computeEncoder setTextures:commandBuffer->computeWriteOnlyTextures
                                              withRange:NSMakeRange(
-                                                           computePipeline->num_readonly_storage_textures,
-                                                           computePipeline->num_writeonly_storage_textures)];
+                                                           computePipeline->numSamplers + computePipeline->numReadonlyStorageTextures,
+                                                           computePipeline->numWriteonlyStorageTextures)];
         }
         commandBuffer->needComputeTextureBind = false;
     }
 
     if (commandBuffer->needComputeBufferBind) {
         // Bind read-only buffers
-        if (computePipeline->num_readonly_storage_buffers > 0) {
+        if (computePipeline->numReadonlyStorageBuffers > 0) {
             [commandBuffer->computeEncoder setBuffers:commandBuffer->computeReadOnlyBuffers
                                               offsets:offsets
                                             withRange:NSMakeRange(computePipeline->numUniformBuffers,
-                                                                  computePipeline->num_readonly_storage_buffers)];
+                                                                  computePipeline->numReadonlyStorageBuffers)];
         }
         // Bind write-only buffers
-        if (computePipeline->num_writeonly_storage_buffers > 0) {
+        if (computePipeline->numWriteonlyStorageBuffers > 0) {
             [commandBuffer->computeEncoder setBuffers:commandBuffer->computeWriteOnlyBuffers
                                               offsets:offsets
                                             withRange:NSMakeRange(
                                                           computePipeline->numUniformBuffers +
-                                                              computePipeline->num_readonly_storage_buffers,
-                                                          computePipeline->num_writeonly_storage_buffers)];
+                                                              computePipeline->numReadonlyStorageBuffers,
+                                                          computePipeline->numWriteonlyStorageBuffers)];
         }
         commandBuffer->needComputeBufferBind = false;
     }
@@ -3020,6 +3039,32 @@ static void METAL_BindComputePipeline(
     }
 }
 
+static void METAL_BindComputeSamplers(
+    SDL_GPUCommandBuffer *commandBuffer,
+    Uint32 firstSlot,
+    const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
+    Uint32 numBindings)
+{
+    MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
+    MetalTextureContainer *textureContainer;
+
+    for (Uint32 i = 0; i < numBindings; i += 1) {
+        textureContainer = (MetalTextureContainer *)textureSamplerBindings[i].texture;
+
+        METAL_INTERNAL_TrackTexture(
+            metalCommandBuffer,
+            textureContainer->activeTexture);
+
+        metalCommandBuffer->computeSamplers[firstSlot + i] =
+            ((MetalSampler *)textureSamplerBindings[i].sampler)->handle;
+
+        metalCommandBuffer->computeSamplerTextures[firstSlot + i] =
+            textureContainer->activeTexture->handle;
+    }
+
+    metalCommandBuffer->needComputeSamplerBind = true;
+}
+
 static void METAL_BindComputeStorageTextures(
     SDL_GPUCommandBuffer *commandBuffer,
     Uint32 firstSlot,
@@ -3092,9 +3137,9 @@ static void METAL_DispatchCompute(
         MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
         MTLSize threadgroups = MTLSizeMake(groupcountX, groupcountY, groupcountZ);
         MTLSize threadsPerThreadgroup = MTLSizeMake(
-            metalCommandBuffer->compute_pipeline->threadcount_x,
-            metalCommandBuffer->compute_pipeline->threadcount_y,
-            metalCommandBuffer->compute_pipeline->threadcount_z);
+            metalCommandBuffer->compute_pipeline->threadcountX,
+            metalCommandBuffer->compute_pipeline->threadcountY,
+            metalCommandBuffer->compute_pipeline->threadcountZ);
 
         METAL_INTERNAL_BindComputeResources(metalCommandBuffer);
 
@@ -3113,9 +3158,9 @@ static void METAL_DispatchComputeIndirect(
         MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer;
         MetalBuffer *metalBuffer = ((MetalBufferContainer *)buffer)->activeBuffer;
         MTLSize threadsPerThreadgroup = MTLSizeMake(
-            metalCommandBuffer->compute_pipeline->threadcount_x,
-            metalCommandBuffer->compute_pipeline->threadcount_y,
-            metalCommandBuffer->compute_pipeline->threadcount_z);
+            metalCommandBuffer->compute_pipeline->threadcountX,
+            metalCommandBuffer->compute_pipeline->threadcountY,
+            metalCommandBuffer->compute_pipeline->threadcountZ);
 
         METAL_INTERNAL_BindComputeResources(metalCommandBuffer);
 
@@ -3136,6 +3181,10 @@ static void METAL_EndComputePass(
         [metalCommandBuffer->computeEncoder endEncoding];
         metalCommandBuffer->computeEncoder = nil;
 
+        for (Uint32 i = 0; i < MAX_TEXTURE_SAMPLERS_PER_STAGE; i += 1) {
+            metalCommandBuffer->computeSamplers[i] = nil;
+            metalCommandBuffer->computeSamplerTextures[i] = nil;
+        }
         for (Uint32 i = 0; i < MAX_COMPUTE_WRITE_TEXTURES; i += 1) {
             metalCommandBuffer->computeWriteOnlyTextures[i] = nil;
         }
@@ -3223,6 +3272,8 @@ static void METAL_INTERNAL_CleanCommandBuffer(
         commandBuffer->vertexTextures[i] = nil;
         commandBuffer->fragmentSamplers[i] = nil;
         commandBuffer->fragmentTextures[i] = nil;
+        commandBuffer->computeSamplers[i] = nil;
+        commandBuffer->computeSamplerTextures[i] = nil;
     }
     for (i = 0; i < MAX_STORAGE_TEXTURES_PER_STAGE; i += 1) {
         commandBuffer->vertexStorageTextures[i] = nil;

+ 77 - 8
src/gpu/vulkan/SDL_gpu_vulkan.c

@@ -891,12 +891,13 @@ typedef struct VulkanComputePipelineResourceLayout
 
     /*
      * Descriptor set layout is as follows:
-     * 0: read-only textures, then read-only buffers
+     * 0: samplers, then read-only textures, then read-only buffers
      * 1: write-only textures, then write-only buffers
      * 2: uniform buffers
      */
     DescriptorSetPool descriptorSetPools[3];
 
+    Uint32 numSamplers;
     Uint32 numReadonlyStorageTextures;
     Uint32 numReadonlyStorageBuffers;
     Uint32 numWriteonlyStorageTextures;
@@ -1055,6 +1056,8 @@ typedef struct VulkanCommandBuffer
     Uint32 writeOnlyComputeStorageTextureSubresourceCount;
     VulkanBuffer *writeOnlyComputeStorageBuffers[MAX_COMPUTE_WRITE_BUFFERS];
 
+    VulkanTexture *computeSamplerTextures[MAX_TEXTURE_SAMPLERS_PER_STAGE];
+    VulkanSampler *computeSamplers[MAX_TEXTURE_SAMPLERS_PER_STAGE];
     VulkanTexture *readOnlyComputeStorageTextures[MAX_STORAGE_TEXTURES_PER_STAGE];
     VulkanBuffer *readOnlyComputeStorageBuffers[MAX_STORAGE_BUFFERS_PER_STAGE];
 
@@ -3870,6 +3873,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout(
     VkResult vulkanResult;
     Uint32 i;
 
+    pipelineResourceLayout->numSamplers = createinfo->num_samplers;
     pipelineResourceLayout->numReadonlyStorageTextures = createinfo->num_readonly_storage_textures;
     pipelineResourceLayout->numReadonlyStorageBuffers = createinfo->num_readonly_storage_buffers;
     pipelineResourceLayout->numWriteonlyStorageTextures = createinfo->num_writeonly_storage_textures;
@@ -3883,6 +3887,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout(
     descriptorSetLayoutCreateInfo.flags = 0;
     descriptorSetLayoutCreateInfo.pBindings = NULL;
     descriptorSetLayoutCreateInfo.bindingCount =
+        createinfo->num_samplers +
         createinfo->num_readonly_storage_textures +
         createinfo->num_readonly_storage_buffers;
 
@@ -3895,7 +3900,18 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout(
         descriptorSetPool->descriptorInfos = SDL_malloc(
             descriptorSetPool->descriptorInfoCount * sizeof(VulkanDescriptorInfo));
 
-        for (i = 0; i < createinfo->num_readonly_storage_textures; i += 1) {
+        for (i = 0; i < createinfo->num_samplers; i += 1) {
+            descriptorSetLayoutBindings[i].binding = i;
+            descriptorSetLayoutBindings[i].descriptorCount = 1;
+            descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+            descriptorSetLayoutBindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
+            descriptorSetLayoutBindings[i].pImmutableSamplers = NULL;
+
+            descriptorSetPool->descriptorInfos[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+            descriptorSetPool->descriptorInfos[i].stageFlag = VK_SHADER_STAGE_COMPUTE_BIT;
+        }
+
+        for (i = createinfo->num_samplers; i < createinfo->num_samplers + createinfo->num_readonly_storage_textures; i += 1) {
             descriptorSetLayoutBindings[i].binding = i;
             descriptorSetLayoutBindings[i].descriptorCount = 1;
             descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
@@ -3906,7 +3922,7 @@ static bool VULKAN_INTERNAL_InitializeComputePipelineResourceLayout(
             descriptorSetPool->descriptorInfos[i].stageFlag = VK_SHADER_STAGE_COMPUTE_BIT;
         }
 
-        for (i = createinfo->num_readonly_storage_textures; i < descriptorSetLayoutCreateInfo.bindingCount; i += 1) {
+        for (i = createinfo->num_samplers + createinfo->num_readonly_storage_textures; i < descriptorSetLayoutCreateInfo.bindingCount; i += 1) {
             descriptorSetLayoutBindings[i].binding = i;
             descriptorSetLayoutBindings[i].descriptorCount = 1;
             descriptorSetLayoutBindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
@@ -8368,6 +8384,31 @@ static void VULKAN_BindComputePipeline(
     vulkanCommandBuffer->needNewComputeUniformOffsets = true;
 }
 
+static void VULKAN_BindComputeSamplers(
+    SDL_GPUCommandBuffer *commandBuffer,
+    Uint32 firstSlot,
+    const SDL_GPUTextureSamplerBinding *textureSamplerBindings,
+    Uint32 numBindings)
+{
+    VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer;
+
+    for (Uint32 i = 0; i < numBindings; i += 1) {
+        VulkanTextureContainer *textureContainer = (VulkanTextureContainer *)textureSamplerBindings[i].texture;
+        vulkanCommandBuffer->computeSamplerTextures[firstSlot + i] = textureContainer->activeTextureHandle->vulkanTexture;
+        vulkanCommandBuffer->computeSamplers[firstSlot + i] = (VulkanSampler *)textureSamplerBindings[i].sampler;
+
+        VULKAN_INTERNAL_TrackSampler(
+            vulkanCommandBuffer,
+            (VulkanSampler *)textureSamplerBindings[i].sampler);
+
+        VULKAN_INTERNAL_TrackTexture(
+            vulkanCommandBuffer,
+            textureContainer->activeTextureHandle->vulkanTexture);
+    }
+
+    vulkanCommandBuffer->needNewComputeReadOnlyDescriptorSet = true;
+}
+
 static void VULKAN_BindComputeStorageTextures(
     SDL_GPUCommandBuffer *commandBuffer,
     Uint32 firstSlot,
@@ -8468,7 +8509,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
     VkWriteDescriptorSet *currentWriteDescriptorSet;
     DescriptorSetPool *descriptorSetPool;
     VkDescriptorBufferInfo bufferInfos[MAX_STORAGE_BUFFERS_PER_STAGE]; // 8 is max for both read and write
-    VkDescriptorImageInfo imageInfos[MAX_STORAGE_TEXTURES_PER_STAGE];  // 8 is max for both read and write
+    VkDescriptorImageInfo imageInfos[MAX_TEXTURE_SAMPLERS_PER_STAGE + MAX_STORAGE_TEXTURES_PER_STAGE];
     Uint32 dynamicOffsets[MAX_UNIFORM_BUFFERS_PER_STAGE];
     Uint32 bufferInfoCount = 0;
     Uint32 imageInfoCount = 0;
@@ -8486,9 +8527,31 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
 
         writeDescriptorSets = SDL_stack_alloc(
             VkWriteDescriptorSet,
-            resourceLayout->numReadonlyStorageTextures +
+            resourceLayout->numSamplers +
+                resourceLayout->numReadonlyStorageTextures +
                 resourceLayout->numReadonlyStorageBuffers);
 
+        for (i = 0; i < resourceLayout->numSamplers; i += 1) {
+            currentWriteDescriptorSet = &writeDescriptorSets[i];
+            currentWriteDescriptorSet->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+            currentWriteDescriptorSet->pNext = NULL;
+            currentWriteDescriptorSet->descriptorCount = 1;
+            currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
+            currentWriteDescriptorSet->dstArrayElement = 0;
+            currentWriteDescriptorSet->dstBinding = i;
+            currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet;
+            currentWriteDescriptorSet->pTexelBufferView = NULL;
+            currentWriteDescriptorSet->pBufferInfo = NULL;
+
+            imageInfos[imageInfoCount].sampler = commandBuffer->computeSamplers[i]->sampler;
+            imageInfos[imageInfoCount].imageView = commandBuffer->computeSamplerTextures[i]->fullView;
+            imageInfos[imageInfoCount].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+            currentWriteDescriptorSet->pImageInfo = &imageInfos[imageInfoCount];
+
+            imageInfoCount += 1;
+        }
+
         for (i = 0; i < resourceLayout->numReadonlyStorageTextures; i += 1) {
             currentWriteDescriptorSet = &writeDescriptorSets[i];
             currentWriteDescriptorSet->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
@@ -8496,7 +8559,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
             currentWriteDescriptorSet->descriptorCount = 1;
             currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
             currentWriteDescriptorSet->dstArrayElement = 0;
-            currentWriteDescriptorSet->dstBinding = i;
+            currentWriteDescriptorSet->dstBinding = resourceLayout->numSamplers + i;
             currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet;
             currentWriteDescriptorSet->pTexelBufferView = NULL;
             currentWriteDescriptorSet->pBufferInfo = NULL;
@@ -8518,7 +8581,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
             currentWriteDescriptorSet->descriptorCount = 1;
             currentWriteDescriptorSet->descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
             currentWriteDescriptorSet->dstArrayElement = 0;
-            currentWriteDescriptorSet->dstBinding = resourceLayout->numReadonlyStorageTextures + i;
+            currentWriteDescriptorSet->dstBinding = resourceLayout->numSamplers + resourceLayout->numReadonlyStorageTextures + i;
             currentWriteDescriptorSet->dstSet = commandBuffer->computeReadOnlyDescriptorSet;
             currentWriteDescriptorSet->pTexelBufferView = NULL;
             currentWriteDescriptorSet->pImageInfo = NULL;
@@ -8534,7 +8597,7 @@ static void VULKAN_INTERNAL_BindComputeDescriptorSets(
 
         renderer->vkUpdateDescriptorSets(
             renderer->logicalDevice,
-            resourceLayout->numReadonlyStorageTextures + resourceLayout->numReadonlyStorageBuffers,
+            resourceLayout->numSamplers + resourceLayout->numReadonlyStorageTextures + resourceLayout->numReadonlyStorageBuffers,
             writeDescriptorSets,
             0,
             NULL);
@@ -8794,6 +8857,10 @@ static void VULKAN_EndComputePass(
         }
     }
 
+    // we don't need a barrier because sampler state is always the default if sampler bit is set
+    SDL_zeroa(vulkanCommandBuffer->computeSamplerTextures);
+    SDL_zeroa(vulkanCommandBuffer->computeSamplers);
+
     vulkanCommandBuffer->currentComputePipeline = NULL;
 
     vulkanCommandBuffer->computeReadOnlyDescriptorSet = VK_NULL_HANDLE;
@@ -9676,6 +9743,8 @@ static SDL_GPUCommandBuffer *VULKAN_AcquireCommandBuffer(
     SDL_zeroa(commandBuffer->writeOnlyComputeStorageTextureSubresources);
     commandBuffer->writeOnlyComputeStorageTextureSubresourceCount = 0;
     SDL_zeroa(commandBuffer->writeOnlyComputeStorageBuffers);
+    SDL_zeroa(commandBuffer->computeSamplerTextures);
+    SDL_zeroa(commandBuffer->computeSamplers);
     SDL_zeroa(commandBuffer->readOnlyComputeStorageTextures);
     SDL_zeroa(commandBuffer->readOnlyComputeStorageBuffers);