Browse Source

Add SDL_sem implementation using Atomics and WaitOnAddress API.
Keep Semaphore Kernel Object impl for Windows 7 and older - choose at runtime

v2: - Fix mixed int/LONG types
- Reorder definitions
- Add missing include

v3: - Use `GetModuleHandle()` to load the API Set

Joel Linn 4 years ago
parent
commit
d0b8295c0d
2 changed files with 338 additions and 22 deletions
  1. 16 0
      include/SDL_hints.h
  2. 322 22
      src/thread/windows/SDL_syssem.c

+ 16 - 0
include/SDL_hints.h

@@ -1210,6 +1210,22 @@ extern "C" {
  */
 #define SDL_HINT_WINDOWS_FORCE_MUTEX_CRITICAL_SECTIONS "SDL_WINDOWS_FORCE_MUTEX_CRITICAL_SECTIONS"
 
+/**
+ * \brief Force SDL to use Kernel Semaphores on Windows.
+ *        Kernel Semaphores are inter-process and require a context
+ *        switch on every interaction. On Windows 8 and newer, the
+ *        WaitOnAddress API is available. Using that and atomics to
+ *        implement semaphores increases performance.
+ *        SDL will fall back to Kernel Objects on older OS versions
+ *        or if forced to by this hint.
+ *
+ *  This variable can be set to the following values:
+ *    "0"       - Use Atomics and WaitOnAddress API when available. If not, fall back to Kernel Objects. (default)
+ *    "1"       - Force the use of Kernel Objects in all cases.
+ *
+ */
+#define SDL_HINT_WINDOWS_FORCE_SEMAPHORE_KERNEL "SDL_WINDOWS_FORCE_SEMAPHORE_KERNEL"
+
 /**
  * \brief Tell SDL which Dispmanx layer to use on a Raspberry PI
  *

+ 322 - 22
src/thread/windows/SDL_syssem.c

@@ -22,27 +22,239 @@
 
 #if SDL_THREAD_WINDOWS
 
-/* Semaphore functions using the Win32 API */
+/**
+ * Semaphore functions using the Win32 API
+ * There are two implementations available based on:
+ * - Kernel Semaphores. Available on all OS versions. (kern)
+ *   Heavy-weight inter-process kernel objects.
+ * - Atomics and WaitOnAddress API. (atom)
+ *   Faster due to significantly less context switches.
+ *   Requires Windows 8 or newer.
+ * which are chosen at runtime.
+*/
 
 #include "../../core/windows/SDL_windows.h"
 
+#include "SDL_hints.h"
 #include "SDL_thread.h"
+#include "SDL_timer.h"
+
+typedef SDL_sem * (*pfnSDL_CreateSemaphore)(Uint32);
+typedef void (*pfnSDL_DestroySemaphore)(SDL_sem *);
+typedef int (*pfnSDL_SemWaitTimeout)(SDL_sem *, Uint32);
+typedef int (*pfnSDL_SemTryWait)(SDL_sem *);
+typedef int (*pfnSDL_SemWait)(SDL_sem *);
+typedef Uint32 (*pfnSDL_SemValue)(SDL_sem *);
+typedef int (*pfnSDL_SemPost)(SDL_sem *);
 
-struct SDL_semaphore
+typedef struct SDL_semaphore_impl_t
 {
-    HANDLE id;
+    pfnSDL_CreateSemaphore  Create;
+    pfnSDL_DestroySemaphore Destroy;
+    pfnSDL_SemWaitTimeout   WaitTimeout;
+    pfnSDL_SemTryWait       TryWait;
+    pfnSDL_SemWait          Wait;
+    pfnSDL_SemValue         Value;
+    pfnSDL_SemPost          Post;
+} SDL_sem_impl_t;
+
+/* Implementation will be chosen at runtime based on available Kernel features */
+static SDL_sem_impl_t SDL_sem_impl_active = {0};
+
+
+/**
+ * Atomic + WaitOnAddress implementation
+ */
+
+typedef BOOL(WINAPI *pfnWaitOnAddress)(volatile VOID*, PVOID, SIZE_T, DWORD);
+typedef VOID(WINAPI *pfnWakeByAddressSingle)(PVOID);
+
+static pfnWaitOnAddress pWaitOnAddress = NULL;
+static pfnWakeByAddressSingle pWakeByAddressSingle = NULL;
+
+typedef struct SDL_semaphore_atom
+{
+    LONG count;
+} SDL_sem_atom;
+
+static SDL_sem *
+SDL_CreateSemaphore_atom(Uint32 initial_value)
+{
+    SDL_sem_atom *sem;
+
+    sem = (SDL_sem_atom *) SDL_malloc(sizeof(*sem));
+    if (sem) {
+        sem->count = initial_value;
+    } else {
+        SDL_OutOfMemory();
+    }
+    return (SDL_sem *)sem;
+}
+
+static void
+SDL_DestroySemaphore_atom(SDL_sem * sem)
+{
+    if (sem) {
+        SDL_free(sem);
+    }
+}
+
+static int
+SDL_SemTryWait_atom(SDL_sem * _sem)
+{
+    SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
     LONG count;
+
+    if (!sem) {
+        return SDL_SetError("Passed a NULL sem");
+    }
+
+    count = sem->count;
+    if (count == 0) {
+        return SDL_MUTEX_TIMEDOUT;
+    }
+
+    if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) {
+        return 0;
+    }
+
+    return SDL_MUTEX_TIMEDOUT;
+}
+
+static int
+SDL_SemWait_atom(SDL_sem * _sem)
+{
+    SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
+    LONG count;
+
+    if (!sem) {
+        return SDL_SetError("Passed a NULL sem");
+    }
+
+    for (;;) {
+        count = sem->count;
+        while (count == 0) {
+            if (pWaitOnAddress(&sem->count, &count, sizeof(sem->count), INFINITE) == FALSE) {
+                return SDL_SetError("WaitOnAddress() failed");
+            }
+            count = sem->count;
+        }
+
+        if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) {
+            return 0;
+        }
+    }
+}
+
+static int
+SDL_SemWaitTimeout_atom(SDL_sem * _sem, Uint32 timeout)
+{
+    SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
+    LONG count;
+    Uint32 now;
+    Uint32 deadline;
+    DWORD timeout_eff;
+
+    if (timeout == SDL_MUTEX_MAXWAIT) {
+        return SDL_SemWait_atom(_sem);
+    }
+
+    if (!sem) {
+        return SDL_SetError("Passed a NULL sem");
+    }
+
+    /**
+     * WaitOnAddress is subject to spurious and stolen wakeups so we
+     * need to recalculate the effective timeout before every wait
+     */
+    now = SDL_GetTicks();
+    deadline = now + (DWORD) timeout;
+
+    for (;;) {
+        count = sem->count;
+        /* If no semaphore is available we need to wait */
+        while (count == 0) {
+            now = SDL_GetTicks();
+            if (deadline > now) {
+                timeout_eff = deadline - now;
+            } else {
+                return SDL_MUTEX_TIMEDOUT;
+            }
+            if (pWaitOnAddress(&sem->count, &count, sizeof(count), timeout_eff) == FALSE) {
+                if (GetLastError() == ERROR_TIMEOUT) {
+                    return SDL_MUTEX_TIMEDOUT;
+                }
+                return SDL_SetError("WaitOnAddress() failed");
+            }
+            count = sem->count;
+        }
+
+        /* Actually the semaphore is only consumed if this succeeds */
+        /* If it doesn't we need to do everything again */
+        if (InterlockedCompareExchange(&sem->count, count - 1, count) == count) {
+            return 0;
+        }
+    }
+}
+
+static Uint32
+SDL_SemValue_atom(SDL_sem * _sem)
+{
+    SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
+
+    if (!sem) {
+        SDL_SetError("Passed a NULL sem");
+        return 0;
+    }
+
+    return (Uint32)sem->count;
+}
+
+static int
+SDL_SemPost_atom(SDL_sem * _sem)
+{
+    SDL_sem_atom *sem = (SDL_sem_atom *)_sem;
+
+    if (!sem) {
+        return SDL_SetError("Passed a NULL sem");
+    }
+
+    InterlockedIncrement(&sem->count);
+    pWakeByAddressSingle(&sem->count);
+
+    return 0;
+}
+
+static const SDL_sem_impl_t SDL_sem_impl_atom =
+{
+    &SDL_CreateSemaphore_atom,
+    &SDL_DestroySemaphore_atom,
+    &SDL_SemWaitTimeout_atom,
+    &SDL_SemTryWait_atom,
+    &SDL_SemWait_atom,
+    &SDL_SemValue_atom,
+    &SDL_SemPost_atom,
 };
 
 
+/**
+ * Fallback Semaphore implementation using Kernel Semaphores
+ */
+
+typedef struct SDL_semaphore_kern
+{
+    HANDLE id;
+    LONG count;
+} SDL_sem_kern;
+
 /* Create a semaphore */
-SDL_sem *
-SDL_CreateSemaphore(Uint32 initial_value)
+static SDL_sem *
+SDL_CreateSemaphore_kern(Uint32 initial_value)
 {
-    SDL_sem *sem;
+    SDL_sem_kern *sem;
 
     /* Allocate sem memory */
-    sem = (SDL_sem *) SDL_malloc(sizeof(*sem));
+    sem = (SDL_sem_kern *) SDL_malloc(sizeof(*sem));
     if (sem) {
         /* Create the semaphore, with max value 32K */
 #if __WINRT__
@@ -59,13 +271,14 @@ SDL_CreateSemaphore(Uint32 initial_value)
     } else {
         SDL_OutOfMemory();
     }
-    return (sem);
+    return (SDL_sem *)sem;
 }
 
 /* Free the semaphore */
-void
-SDL_DestroySemaphore(SDL_sem * sem)
+static void
+SDL_DestroySemaphore_kern(SDL_sem * _sem)
 {
+    SDL_sem_kern *sem = (SDL_sem_kern *)_sem;
     if (sem) {
         if (sem->id) {
             CloseHandle(sem->id);
@@ -75,9 +288,10 @@ SDL_DestroySemaphore(SDL_sem * sem)
     }
 }
 
-int
-SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout)
+static int
+SDL_SemWaitTimeout_kern(SDL_sem * _sem, Uint32 timeout)
 {
+    SDL_sem_kern *sem = (SDL_sem_kern *)_sem;
     int retval;
     DWORD dwMilliseconds;
 
@@ -105,22 +319,23 @@ SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout)
     return retval;
 }
 
-int
-SDL_SemTryWait(SDL_sem * sem)
+static int
+SDL_SemTryWait_kern(SDL_sem * sem)
 {
-    return SDL_SemWaitTimeout(sem, 0);
+    return SDL_SemWaitTimeout_kern(sem, 0);
 }
 
-int
-SDL_SemWait(SDL_sem * sem)
+static int
+SDL_SemWait_kern(SDL_sem * sem)
 {
-    return SDL_SemWaitTimeout(sem, SDL_MUTEX_MAXWAIT);
+    return SDL_SemWaitTimeout_kern(sem, SDL_MUTEX_MAXWAIT);
 }
 
 /* Returns the current count of the semaphore */
-Uint32
-SDL_SemValue(SDL_sem * sem)
+static Uint32
+SDL_SemValue_kern(SDL_sem * _sem)
 {
+    SDL_sem_kern *sem = (SDL_sem_kern *)_sem;
     if (!sem) {
         SDL_SetError("Passed a NULL sem");
         return 0;
@@ -128,9 +343,10 @@ SDL_SemValue(SDL_sem * sem)
     return (Uint32)sem->count;
 }
 
-int
-SDL_SemPost(SDL_sem * sem)
+static int
+SDL_SemPost_kern(SDL_sem * _sem)
 {
+    SDL_sem_kern *sem = (SDL_sem_kern *)_sem;
     if (!sem) {
         return SDL_SetError("Passed a NULL sem");
     }
@@ -147,6 +363,90 @@ SDL_SemPost(SDL_sem * sem)
     return 0;
 }
 
+static const SDL_sem_impl_t SDL_sem_impl_kern =
+{
+    &SDL_CreateSemaphore_kern,
+    &SDL_DestroySemaphore_kern,
+    &SDL_SemWaitTimeout_kern,
+    &SDL_SemTryWait_kern,
+    &SDL_SemWait_kern,
+    &SDL_SemValue_kern,
+    &SDL_SemPost_kern,
+};
+
+
+/**
+ * Runtime selection and redirection
+ */
+
+SDL_sem *
+SDL_CreateSemaphore(Uint32 initial_value)
+{
+    if (SDL_sem_impl_active.Create == NULL) {
+        /* Default to fallback implementation */
+        const SDL_sem_impl_t * impl = &SDL_sem_impl_kern;
+
+        if (!SDL_GetHintBoolean(SDL_HINT_WINDOWS_FORCE_SEMAPHORE_KERNEL, SDL_FALSE)) {
+            /* We already statically link to features from this Api
+             * Set (e.g. WaitForSingleObject). Dynamically loading
+             * API Sets is not explicitly documented but according to
+             * Microsoft our specific use case is legal and correct:
+             * https://github.com/microsoft/STL/pull/593#issuecomment-655799859
+             */
+            HMODULE synch120 = GetModuleHandleW(L"api-ms-win-core-synch-l1-2-0.dll");
+            if (synch120) {
+                /* Try to load required functions provided by Win 8 or newer */
+                pWaitOnAddress = (pfnWaitOnAddress) GetProcAddress(synch120, "WaitOnAddress");
+                pWakeByAddressSingle = (pfnWakeByAddressSingle) GetProcAddress(synch120, "WakeByAddressSingle");
+
+                if(pWaitOnAddress && pWakeByAddressSingle) {
+                    impl = &SDL_sem_impl_atom;
+                }
+            }
+        }
+
+        /* Copy instead of using pointer to save one level of indirection */
+        SDL_memcpy(&SDL_sem_impl_active, impl, sizeof(SDL_sem_impl_active));
+    }
+    return SDL_sem_impl_active.Create(initial_value);
+}
+
+void
+SDL_DestroySemaphore(SDL_sem * sem)
+{
+    SDL_sem_impl_active.Destroy(sem);
+}
+
+int
+SDL_SemWaitTimeout(SDL_sem * sem, Uint32 timeout)
+{
+    return SDL_sem_impl_active.WaitTimeout(sem, timeout);
+}
+
+int
+SDL_SemTryWait(SDL_sem * sem)
+{
+    return SDL_sem_impl_active.TryWait(sem);
+}
+
+int
+SDL_SemWait(SDL_sem * sem)
+{
+    return SDL_sem_impl_active.Wait(sem);
+}
+
+Uint32
+SDL_SemValue(SDL_sem * sem)
+{
+    return SDL_sem_impl_active.Value(sem);
+}
+
+int
+SDL_SemPost(SDL_sem * sem)
+{
+    return SDL_sem_impl_active.Post(sem);
+}
+
 #endif /* SDL_THREAD_WINDOWS */
 
 /* vi: set ts=4 sw=4 expandtab: */