Parcourir la source

Added SDL_StepBackUTF8()

Sam Lantinga il y a 6 mois
Parent
commit
f8eac30276

+ 31 - 1
include/SDL3/SDL_stdinc.h

@@ -2472,13 +2472,14 @@ extern SDL_DECLSPEC char * SDLCALL SDL_strpbrk(const char *str, const char *brea
 /**
  * The Unicode REPLACEMENT CHARACTER codepoint.
  *
- * SDL_StepUTF8() reports this codepoint when it encounters a UTF-8 string
+ * SDL_StepUTF8() and SDL_StepBackUTF8() report this codepoint when they encounter a UTF-8 string
  * with encoding errors.
  *
  * This tends to render as something like a question mark in most places.
  *
  * \since This macro is available since SDL 3.0.0.
  *
+ * \sa SDL_StepBackUTF8
  * \sa SDL_StepUTF8
  */
 #define SDL_INVALID_UNICODE_CODEPOINT 0xFFFD
@@ -2528,6 +2529,35 @@ extern SDL_DECLSPEC char * SDLCALL SDL_strpbrk(const char *str, const char *brea
  */
 extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepUTF8(const char **pstr, size_t *pslen);
 
+/**
+ * Decode a UTF-8 string in reverse, one Unicode codepoint at a time.
+ *
+ * This will go to the start of the previous Unicode codepoint in the string, move `*pstr` to that location and return that codepoint.
+ *
+ * If the resulting codepoint is zero (already at the start of the string), it will not advance `*pstr` at all.
+ *
+ * Generally this function is called in a loop until it returns zero,
+ * adjusting its parameter each iteration.
+ *
+ * If an invalid UTF-8 sequence is encountered, this function returns
+ * SDL_INVALID_UNICODE_CODEPOINT.
+ *
+ * Several things can generate invalid UTF-8 sequences, including overlong
+ * encodings, the use of UTF-16 surrogate values, and truncated data. Please
+ * refer to
+ * [RFC3629](https://www.ietf.org/rfc/rfc3629.txt)
+ * for details.
+ *
+ * \param start a pointer to the beginning of the UTF-8 string.
+ * \param pstr a pointer to a UTF-8 string pointer to be read and adjusted.
+ * \returns the previous Unicode codepoint in the string.
+ *
+ * \threadsafety It is safe to call this function from any thread.
+ *
+ * \since This function is available since SDL 3.0.0.
+ */
+extern SDL_DECLSPEC Uint32 SDLCALL SDL_StepBackUTF8(const char *start, const char **pstr);
+
 /**
  * Convert a single Unicode codepoint to UTF-8.
  *

+ 1 - 0
src/dynapi/SDL_dynapi.sym

@@ -1176,6 +1176,7 @@ SDL3_0.0.0 {
     SDL_wcsnstr;
     SDL_wcsstr;
     SDL_wcstol;
+    SDL_StepBackUTF8;
     # extra symbols go here (don't modify this line)
   local: *;
 };

+ 1 - 0
src/dynapi/SDL_dynapi_overrides.h

@@ -1201,3 +1201,4 @@
 #define SDL_wcsnstr SDL_wcsnstr_REAL
 #define SDL_wcsstr SDL_wcsstr_REAL
 #define SDL_wcstol SDL_wcstol_REAL
+#define SDL_StepBackUTF8 SDL_StepBackUTF8_REAL

+ 1 - 0
src/dynapi/SDL_dynapi_procs.h

@@ -1207,3 +1207,4 @@ SDL_DYNAPI_PROC(size_t,SDL_wcsnlen,(const wchar_t *a, size_t b),(a,b),return)
 SDL_DYNAPI_PROC(wchar_t*,SDL_wcsnstr,(const wchar_t *a, const wchar_t *b, size_t c),(a,b,c),return)
 SDL_DYNAPI_PROC(wchar_t*,SDL_wcsstr,(const wchar_t *a, const wchar_t *b),(a,b),return)
 SDL_DYNAPI_PROC(long,SDL_wcstol,(const wchar_t *a, wchar_t **b, int c),(a,b,c),return)
+SDL_DYNAPI_PROC(Uint32,SDL_StepBackUTF8,(const char *a, const char **b),(a,b),return)

+ 20 - 0
src/stdlib/SDL_string.c

@@ -265,6 +265,26 @@ Uint32 SDL_StepUTF8(const char **pstr, size_t *pslen)
     return result;
 }
 
+Uint32 SDL_StepBackUTF8(const char *start, const char **pstr)
+{
+    if (!pstr || *pstr <= start) {
+        return 0;
+    }
+
+    // Step back over the previous UTF-8 character
+    const char *str = *pstr;
+    do {
+        if (str == start) {
+            break;
+        }
+        --str;
+    } while ((*str & 0xC0) == 0x80);
+
+    size_t length = (*pstr - str);
+    *pstr = str;
+    return StepUTF8(&str, length);
+}
+
 #if (SDL_SIZEOF_WCHAR_T == 2)
 static Uint32 StepUTF16(const Uint16 **_str, const size_t slen)
 {