test-idna.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. /* Copyright The libuv project and contributors. All rights reserved.
  2. *
  3. * Permission is hereby granted, free of charge, to any person obtaining a copy
  4. * of this software and associated documentation files (the "Software"), to
  5. * deal in the Software without restriction, including without limitation the
  6. * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  7. * sell copies of the Software, and to permit persons to whom the Software is
  8. * furnished to do so, subject to the following conditions:
  9. *
  10. * The above copyright notice and this permission notice shall be included in
  11. * all copies or substantial portions of the Software.
  12. *
  13. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  14. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  15. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  16. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  17. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  18. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  19. * IN THE SOFTWARE.
  20. */
  21. #include "task.h"
  22. #include "../src/idna.c"
  23. #include <string.h>
  24. TEST_IMPL(utf8_decode1) {
  25. const char* p;
  26. char b[32];
  27. int i;
  28. /* ASCII. */
  29. p = b;
  30. snprintf(b, sizeof(b), "%c\x7F", 0x00);
  31. ASSERT(0 == uv__utf8_decode1(&p, b + sizeof(b)));
  32. ASSERT(p == b + 1);
  33. ASSERT(127 == uv__utf8_decode1(&p, b + sizeof(b)));
  34. ASSERT(p == b + 2);
  35. /* Two-byte sequences. */
  36. p = b;
  37. snprintf(b, sizeof(b), "\xC2\x80\xDF\xBF");
  38. ASSERT(128 == uv__utf8_decode1(&p, b + sizeof(b)));
  39. ASSERT(p == b + 2);
  40. ASSERT(0x7FF == uv__utf8_decode1(&p, b + sizeof(b)));
  41. ASSERT(p == b + 4);
  42. /* Three-byte sequences. */
  43. p = b;
  44. snprintf(b, sizeof(b), "\xE0\xA0\x80\xEF\xBF\xBF");
  45. ASSERT(0x800 == uv__utf8_decode1(&p, b + sizeof(b)));
  46. ASSERT(p == b + 3);
  47. ASSERT(0xFFFF == uv__utf8_decode1(&p, b + sizeof(b)));
  48. ASSERT(p == b + 6);
  49. /* Four-byte sequences. */
  50. p = b;
  51. snprintf(b, sizeof(b), "\xF0\x90\x80\x80\xF4\x8F\xBF\xBF");
  52. ASSERT(0x10000 == uv__utf8_decode1(&p, b + sizeof(b)));
  53. ASSERT(p == b + 4);
  54. ASSERT(0x10FFFF == uv__utf8_decode1(&p, b + sizeof(b)));
  55. ASSERT(p == b + 8);
  56. /* Four-byte sequences > U+10FFFF; disallowed. */
  57. p = b;
  58. snprintf(b, sizeof(b), "\xF4\x90\xC0\xC0\xF7\xBF\xBF\xBF");
  59. ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
  60. ASSERT(p == b + 4);
  61. ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
  62. ASSERT(p == b + 8);
  63. /* Overlong; disallowed. */
  64. p = b;
  65. snprintf(b, sizeof(b), "\xC0\x80\xC1\x80");
  66. ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
  67. ASSERT(p == b + 2);
  68. ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
  69. ASSERT(p == b + 4);
  70. /* Surrogate pairs; disallowed. */
  71. p = b;
  72. snprintf(b, sizeof(b), "\xED\xA0\x80\xED\xA3\xBF");
  73. ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
  74. ASSERT(p == b + 3);
  75. ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
  76. ASSERT(p == b + 6);
  77. /* Simply illegal. */
  78. p = b;
  79. snprintf(b, sizeof(b), "\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF");
  80. for (i = 1; i <= 8; i++) {
  81. ASSERT((unsigned) -1 == uv__utf8_decode1(&p, b + sizeof(b)));
  82. ASSERT(p == b + i);
  83. }
  84. return 0;
  85. }
  86. /* Doesn't work on z/OS because that platform uses EBCDIC, not ASCII. */
  87. #ifndef __MVS__
  88. #define F(input, err) \
  89. do { \
  90. char d[256] = {0}; \
  91. static const char s[] = "" input ""; \
  92. ASSERT(err == uv__idna_toascii(s, s + sizeof(s) - 1, d, d + sizeof(d))); \
  93. } while (0)
  94. #define T(input, expected) \
  95. do { \
  96. long n; \
  97. char d1[256] = {0}; \
  98. char d2[256] = {0}; \
  99. static const char s[] = "" input ""; \
  100. n = uv__idna_toascii(s, s + sizeof(s) - 1, d1, d1 + sizeof(d1)); \
  101. ASSERT(n == sizeof(expected)); \
  102. ASSERT(0 == memcmp(d1, expected, n)); \
  103. /* Sanity check: encoding twice should not change the output. */ \
  104. n = uv__idna_toascii(d1, d1 + strlen(d1), d2, d2 + sizeof(d2)); \
  105. ASSERT(n == sizeof(expected)); \
  106. ASSERT(0 == memcmp(d2, expected, n)); \
  107. ASSERT(0 == memcmp(d1, d2, sizeof(d2))); \
  108. } while (0)
  109. TEST_IMPL(idna_toascii) {
  110. /* Illegal inputs. */
  111. F("\xC0\x80\xC1\x80", UV_EINVAL); /* Overlong UTF-8 sequence. */
  112. F("\xC0\x80\xC1\x80.com", UV_EINVAL); /* Overlong UTF-8 sequence. */
  113. /* No conversion. */
  114. T("", "");
  115. T(".", ".");
  116. T(".com", ".com");
  117. T("example", "example");
  118. T("example-", "example-");
  119. T("straße.de", "xn--strae-oqa.de");
  120. /* Test cases adapted from punycode.js. Most are from RFC 3492. */
  121. T("foo.bar", "foo.bar");
  122. T("mañana.com", "xn--maana-pta.com");
  123. T("example.com.", "example.com.");
  124. T("bücher.com", "xn--bcher-kva.com");
  125. T("café.com", "xn--caf-dma.com");
  126. T("café.café.com", "xn--caf-dma.xn--caf-dma.com");
  127. T("☃-⌘.com", "xn----dqo34k.com");
  128. T("퐀☃-⌘.com", "xn----dqo34kn65z.com");
  129. T("💩.la", "xn--ls8h.la");
  130. T("mañana.com", "xn--maana-pta.com");
  131. T("mañana。com", "xn--maana-pta.com");
  132. T("mañana.com", "xn--maana-pta.com");
  133. T("mañana。com", "xn--maana-pta.com");
  134. T("ü", "xn--tda");
  135. T(".ü", ".xn--tda");
  136. T("ü.ü", "xn--tda.xn--tda");
  137. T("ü.ü.", "xn--tda.xn--tda.");
  138. T("üëäö♥", "xn--4can8av2009b");
  139. T("Willst du die Blüthe des frühen, die Früchte des späteren Jahres",
  140. "xn--Willst du die Blthe des frhen, "
  141. "die Frchte des spteren Jahres-x9e96lkal");
  142. T("ليهمابتكلموشعربي؟", "xn--egbpdaj6bu4bxfgehfvwxn");
  143. T("他们为什么不说中文", "xn--ihqwcrb4cv8a8dqg056pqjye");
  144. T("他們爲什麽不說中文", "xn--ihqwctvzc91f659drss3x8bo0yb");
  145. T("Pročprostěnemluvíčesky", "xn--Proprostnemluvesky-uyb24dma41a");
  146. T("למההםפשוטלאמדבריםעברית", "xn--4dbcagdahymbxekheh6e0a7fei0b");
  147. T("यहलोगहिन्दीक्योंनहींबोलसकतेहैं",
  148. "xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd");
  149. T("なぜみんな日本語を話してくれないのか",
  150. "xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa");
  151. T("세계의모든사람들이한국어를이해한다면얼마나좋을까",
  152. "xn--989aomsvi5e83db1d2a355cv1e0vak1d"
  153. "wrv93d5xbh15a0dt30a5jpsd879ccm6fea98c");
  154. T("почемужеонинеговорятпорусски", "xn--b1abfaaepdrnnbgefbadotcwatmq2g4l");
  155. T("PorquénopuedensimplementehablarenEspañol",
  156. "xn--PorqunopuedensimplementehablarenEspaol-fmd56a");
  157. T("TạisaohọkhôngthểchỉnóitiếngViệt",
  158. "xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g");
  159. T("3年B組金八先生", "xn--3B-ww4c5e180e575a65lsy2b");
  160. T("安室奈美恵-with-SUPER-MONKEYS",
  161. "xn---with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n");
  162. T("Hello-Another-Way-それぞれの場所",
  163. "xn--Hello-Another-Way--fc4qua05auwb3674vfr0b");
  164. T("ひとつ屋根の下2", "xn--2-u9tlzr9756bt3uc0v");
  165. T("MajiでKoiする5秒前", "xn--MajiKoi5-783gue6qz075azm5e");
  166. T("パフィーdeルンバ", "xn--de-jg4avhby1noc0d");
  167. T("そのスピードで", "xn--d9juau41awczczp");
  168. T("-> $1.00 <-", "-> $1.00 <-");
  169. /* Test cases from https://unicode.org/reports/tr46/ */
  170. T("faß.de", "xn--fa-hia.de");
  171. T("βόλος.com", "xn--nxasmm1c.com");
  172. T("ශ්‍රී.com", "xn--10cl1a0b660p.com");
  173. T("نامه‌ای.com", "xn--mgba3gch31f060k.com");
  174. return 0;
  175. }
  176. #undef T
  177. #endif /* __MVS__ */