escaping_test.cc 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/strings/escaping.h"
  15. #include <array>
  16. #include <cstdio>
  17. #include <cstring>
  18. #include <memory>
  19. #include <vector>
  20. #include "gmock/gmock.h"
  21. #include "gtest/gtest.h"
  22. #include "absl/container/fixed_array.h"
  23. #include "absl/strings/str_cat.h"
  24. #include "absl/strings/internal/escaping_test_common.h"
  25. namespace {
  26. struct epair {
  27. std::string escaped;
  28. std::string unescaped;
  29. };
  30. TEST(CEscape, EscapeAndUnescape) {
  31. const std::string inputs[] = {
  32. std::string("foo\nxx\r\b\0023"),
  33. std::string(""),
  34. std::string("abc"),
  35. std::string("\1chad_rules"),
  36. std::string("\1arnar_drools"),
  37. std::string("xxxx\r\t'\"\\"),
  38. std::string("\0xx\0", 4),
  39. std::string("\x01\x31"),
  40. std::string("abc\xb\x42\141bc"),
  41. std::string("123\1\x31\x32\x33"),
  42. std::string("\xc1\xca\x1b\x62\x19o\xcc\x04"),
  43. std::string(
  44. "\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name"),
  45. };
  46. // Do this twice, once for octal escapes and once for hex escapes.
  47. for (int kind = 0; kind < 4; kind++) {
  48. for (const std::string& original : inputs) {
  49. std::string escaped;
  50. switch (kind) {
  51. case 0:
  52. escaped = absl::CEscape(original);
  53. break;
  54. case 1:
  55. escaped = absl::CHexEscape(original);
  56. break;
  57. case 2:
  58. escaped = absl::Utf8SafeCEscape(original);
  59. break;
  60. case 3:
  61. escaped = absl::Utf8SafeCHexEscape(original);
  62. break;
  63. }
  64. std::string unescaped_str;
  65. EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str));
  66. EXPECT_EQ(unescaped_str, original);
  67. unescaped_str.erase();
  68. std::string error;
  69. EXPECT_TRUE(absl::CUnescape(escaped, &unescaped_str, &error));
  70. EXPECT_EQ(error, "");
  71. // Check in-place unescaping
  72. std::string s = escaped;
  73. EXPECT_TRUE(absl::CUnescape(s, &s));
  74. ASSERT_EQ(s, original);
  75. }
  76. }
  77. // Check that all possible two character strings can be escaped then
  78. // unescaped successfully.
  79. for (int char0 = 0; char0 < 256; char0++) {
  80. for (int char1 = 0; char1 < 256; char1++) {
  81. char chars[2];
  82. chars[0] = char0;
  83. chars[1] = char1;
  84. std::string s(chars, 2);
  85. std::string escaped = absl::CHexEscape(s);
  86. std::string unescaped;
  87. EXPECT_TRUE(absl::CUnescape(escaped, &unescaped));
  88. EXPECT_EQ(s, unescaped);
  89. }
  90. }
  91. }
  92. TEST(CEscape, BasicEscaping) {
  93. epair oct_values[] = {
  94. {"foo\\rbar\\nbaz\\t", "foo\rbar\nbaz\t"},
  95. {"\\'full of \\\"sound\\\" and \\\"fury\\\"\\'",
  96. "'full of \"sound\" and \"fury\"'"},
  97. {"signi\\\\fying\\\\ nothing\\\\", "signi\\fying\\ nothing\\"},
  98. {"\\010\\t\\n\\013\\014\\r", "\010\011\012\013\014\015"}
  99. };
  100. epair hex_values[] = {
  101. {"ubik\\rubik\\nubik\\t", "ubik\rubik\nubik\t"},
  102. {"I\\\'ve just seen a \\\"face\\\"",
  103. "I've just seen a \"face\""},
  104. {"hel\\\\ter\\\\skel\\\\ter\\\\", "hel\\ter\\skel\\ter\\"},
  105. {"\\x08\\t\\n\\x0b\\x0c\\r", "\010\011\012\013\014\015"}
  106. };
  107. epair utf8_oct_values[] = {
  108. {"\xe8\xb0\xb7\xe6\xad\x8c\\r\xe8\xb0\xb7\xe6\xad\x8c\\nbaz\\t",
  109. "\xe8\xb0\xb7\xe6\xad\x8c\r\xe8\xb0\xb7\xe6\xad\x8c\nbaz\t"},
  110. {"\\\"\xe8\xb0\xb7\xe6\xad\x8c\\\" is Google\\\'s Chinese name",
  111. "\"\xe8\xb0\xb7\xe6\xad\x8c\" is Google\'s Chinese name"},
  112. {"\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\\\are\\\\Japanese\\\\chars\\\\",
  113. "\xe3\x83\xa1\xe3\x83\xbc\xe3\x83\xab\\are\\Japanese\\chars\\"},
  114. {"\xed\x81\xac\xeb\xa1\xac\\010\\t\\n\\013\\014\\r",
  115. "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
  116. };
  117. epair utf8_hex_values[] = {
  118. {"\x20\xe4\xbd\xa0\\t\xe5\xa5\xbd,\\r!\\n",
  119. "\x20\xe4\xbd\xa0\t\xe5\xa5\xbd,\r!\n"},
  120. {"\xe8\xa9\xa6\xe9\xa8\x93\\\' means \\\"test\\\"",
  121. "\xe8\xa9\xa6\xe9\xa8\x93\' means \"test\""},
  122. {"\\\\\xe6\x88\x91\\\\:\\\\\xe6\x9d\xa8\xe6\xac\xa2\\\\",
  123. "\\\xe6\x88\x91\\:\\\xe6\x9d\xa8\xe6\xac\xa2\\"},
  124. {"\xed\x81\xac\xeb\xa1\xac\\x08\\t\\n\\x0b\\x0c\\r",
  125. "\xed\x81\xac\xeb\xa1\xac\010\011\012\013\014\015"}
  126. };
  127. for (const epair& val : oct_values) {
  128. std::string escaped = absl::CEscape(val.unescaped);
  129. EXPECT_EQ(escaped, val.escaped);
  130. }
  131. for (const epair& val : hex_values) {
  132. std::string escaped = absl::CHexEscape(val.unescaped);
  133. EXPECT_EQ(escaped, val.escaped);
  134. }
  135. for (const epair& val : utf8_oct_values) {
  136. std::string escaped = absl::Utf8SafeCEscape(val.unescaped);
  137. EXPECT_EQ(escaped, val.escaped);
  138. }
  139. for (const epair& val : utf8_hex_values) {
  140. std::string escaped = absl::Utf8SafeCHexEscape(val.unescaped);
  141. EXPECT_EQ(escaped, val.escaped);
  142. }
  143. }
  144. TEST(Unescape, BasicFunction) {
  145. epair tests[] =
  146. {{"", ""},
  147. {"\\u0030", "0"},
  148. {"\\u00A3", "\xC2\xA3"},
  149. {"\\u22FD", "\xE2\x8B\xBD"},
  150. {"\\U00010000", "\xF0\x90\x80\x80"},
  151. {"\\U0010FFFD", "\xF4\x8F\xBF\xBD"}};
  152. for (const epair& val : tests) {
  153. std::string out;
  154. EXPECT_TRUE(absl::CUnescape(val.escaped, &out));
  155. EXPECT_EQ(out, val.unescaped);
  156. }
  157. std::string bad[] = {"\\u1", // too short
  158. "\\U1", // too short
  159. "\\Uffffff", // exceeds 0x10ffff (largest Unicode)
  160. "\\U00110000", // exceeds 0x10ffff (largest Unicode)
  161. "\\uD835", // surrogate character (D800-DFFF)
  162. "\\U0000DD04", // surrogate character (D800-DFFF)
  163. "\\777", // exceeds 0xff
  164. "\\xABCD"}; // exceeds 0xff
  165. for (const std::string& e : bad) {
  166. std::string error;
  167. std::string out;
  168. EXPECT_FALSE(absl::CUnescape(e, &out, &error));
  169. EXPECT_FALSE(error.empty());
  170. out.erase();
  171. EXPECT_FALSE(absl::CUnescape(e, &out));
  172. }
  173. }
  174. class CUnescapeTest : public testing::Test {
  175. protected:
  176. static const char kStringWithMultipleOctalNulls[];
  177. static const char kStringWithMultipleHexNulls[];
  178. static const char kStringWithMultipleUnicodeNulls[];
  179. std::string result_string_;
  180. };
  181. const char CUnescapeTest::kStringWithMultipleOctalNulls[] =
  182. "\\0\\n" // null escape \0 plus newline
  183. "0\\n" // just a number 0 (not a null escape) plus newline
  184. "\\00\\12" // null escape \00 plus octal newline code
  185. "\\000"; // null escape \000
  186. // This has the same ingredients as kStringWithMultipleOctalNulls
  187. // but with \x hex escapes instead of octal escapes.
  188. const char CUnescapeTest::kStringWithMultipleHexNulls[] =
  189. "\\x0\\n"
  190. "0\\n"
  191. "\\x00\\xa"
  192. "\\x000";
  193. const char CUnescapeTest::kStringWithMultipleUnicodeNulls[] =
  194. "\\u0000\\n" // short-form (4-digit) null escape plus newline
  195. "0\\n" // just a number 0 (not a null escape) plus newline
  196. "\\U00000000"; // long-form (8-digit) null escape
  197. TEST_F(CUnescapeTest, Unescapes1CharOctalNull) {
  198. std::string original_string = "\\0";
  199. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  200. EXPECT_EQ(std::string("\0", 1), result_string_);
  201. }
  202. TEST_F(CUnescapeTest, Unescapes2CharOctalNull) {
  203. std::string original_string = "\\00";
  204. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  205. EXPECT_EQ(std::string("\0", 1), result_string_);
  206. }
  207. TEST_F(CUnescapeTest, Unescapes3CharOctalNull) {
  208. std::string original_string = "\\000";
  209. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  210. EXPECT_EQ(std::string("\0", 1), result_string_);
  211. }
  212. TEST_F(CUnescapeTest, Unescapes1CharHexNull) {
  213. std::string original_string = "\\x0";
  214. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  215. EXPECT_EQ(std::string("\0", 1), result_string_);
  216. }
  217. TEST_F(CUnescapeTest, Unescapes2CharHexNull) {
  218. std::string original_string = "\\x00";
  219. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  220. EXPECT_EQ(std::string("\0", 1), result_string_);
  221. }
  222. TEST_F(CUnescapeTest, Unescapes3CharHexNull) {
  223. std::string original_string = "\\x000";
  224. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  225. EXPECT_EQ(std::string("\0", 1), result_string_);
  226. }
  227. TEST_F(CUnescapeTest, Unescapes4CharUnicodeNull) {
  228. std::string original_string = "\\u0000";
  229. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  230. EXPECT_EQ(std::string("\0", 1), result_string_);
  231. }
  232. TEST_F(CUnescapeTest, Unescapes8CharUnicodeNull) {
  233. std::string original_string = "\\U00000000";
  234. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  235. EXPECT_EQ(std::string("\0", 1), result_string_);
  236. }
  237. TEST_F(CUnescapeTest, UnescapesMultipleOctalNulls) {
  238. std::string original_string(kStringWithMultipleOctalNulls);
  239. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  240. // All escapes, including newlines and null escapes, should have been
  241. // converted to the equivalent characters.
  242. EXPECT_EQ(std::string("\0\n"
  243. "0\n"
  244. "\0\n"
  245. "\0",
  246. 7),
  247. result_string_);
  248. }
  249. TEST_F(CUnescapeTest, UnescapesMultipleHexNulls) {
  250. std::string original_string(kStringWithMultipleHexNulls);
  251. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  252. EXPECT_EQ(std::string("\0\n"
  253. "0\n"
  254. "\0\n"
  255. "\0",
  256. 7),
  257. result_string_);
  258. }
  259. TEST_F(CUnescapeTest, UnescapesMultipleUnicodeNulls) {
  260. std::string original_string(kStringWithMultipleUnicodeNulls);
  261. EXPECT_TRUE(absl::CUnescape(original_string, &result_string_));
  262. EXPECT_EQ(std::string("\0\n"
  263. "0\n"
  264. "\0",
  265. 5),
  266. result_string_);
  267. }
  268. static struct {
  269. absl::string_view plaintext;
  270. absl::string_view cyphertext;
  271. } const base64_tests[] = {
  272. // Empty string.
  273. {{"", 0}, {"", 0}},
  274. {{nullptr, 0},
  275. {"", 0}}, // if length is zero, plaintext ptr must be ignored!
  276. // Basic bit patterns;
  277. // values obtained with "echo -n '...' | uuencode -m test"
  278. {{"\000", 1}, "AA=="},
  279. {{"\001", 1}, "AQ=="},
  280. {{"\002", 1}, "Ag=="},
  281. {{"\004", 1}, "BA=="},
  282. {{"\010", 1}, "CA=="},
  283. {{"\020", 1}, "EA=="},
  284. {{"\040", 1}, "IA=="},
  285. {{"\100", 1}, "QA=="},
  286. {{"\200", 1}, "gA=="},
  287. {{"\377", 1}, "/w=="},
  288. {{"\376", 1}, "/g=="},
  289. {{"\375", 1}, "/Q=="},
  290. {{"\373", 1}, "+w=="},
  291. {{"\367", 1}, "9w=="},
  292. {{"\357", 1}, "7w=="},
  293. {{"\337", 1}, "3w=="},
  294. {{"\277", 1}, "vw=="},
  295. {{"\177", 1}, "fw=="},
  296. {{"\000\000", 2}, "AAA="},
  297. {{"\000\001", 2}, "AAE="},
  298. {{"\000\002", 2}, "AAI="},
  299. {{"\000\004", 2}, "AAQ="},
  300. {{"\000\010", 2}, "AAg="},
  301. {{"\000\020", 2}, "ABA="},
  302. {{"\000\040", 2}, "ACA="},
  303. {{"\000\100", 2}, "AEA="},
  304. {{"\000\200", 2}, "AIA="},
  305. {{"\001\000", 2}, "AQA="},
  306. {{"\002\000", 2}, "AgA="},
  307. {{"\004\000", 2}, "BAA="},
  308. {{"\010\000", 2}, "CAA="},
  309. {{"\020\000", 2}, "EAA="},
  310. {{"\040\000", 2}, "IAA="},
  311. {{"\100\000", 2}, "QAA="},
  312. {{"\200\000", 2}, "gAA="},
  313. {{"\377\377", 2}, "//8="},
  314. {{"\377\376", 2}, "//4="},
  315. {{"\377\375", 2}, "//0="},
  316. {{"\377\373", 2}, "//s="},
  317. {{"\377\367", 2}, "//c="},
  318. {{"\377\357", 2}, "/+8="},
  319. {{"\377\337", 2}, "/98="},
  320. {{"\377\277", 2}, "/78="},
  321. {{"\377\177", 2}, "/38="},
  322. {{"\376\377", 2}, "/v8="},
  323. {{"\375\377", 2}, "/f8="},
  324. {{"\373\377", 2}, "+/8="},
  325. {{"\367\377", 2}, "9/8="},
  326. {{"\357\377", 2}, "7/8="},
  327. {{"\337\377", 2}, "3/8="},
  328. {{"\277\377", 2}, "v/8="},
  329. {{"\177\377", 2}, "f/8="},
  330. {{"\000\000\000", 3}, "AAAA"},
  331. {{"\000\000\001", 3}, "AAAB"},
  332. {{"\000\000\002", 3}, "AAAC"},
  333. {{"\000\000\004", 3}, "AAAE"},
  334. {{"\000\000\010", 3}, "AAAI"},
  335. {{"\000\000\020", 3}, "AAAQ"},
  336. {{"\000\000\040", 3}, "AAAg"},
  337. {{"\000\000\100", 3}, "AABA"},
  338. {{"\000\000\200", 3}, "AACA"},
  339. {{"\000\001\000", 3}, "AAEA"},
  340. {{"\000\002\000", 3}, "AAIA"},
  341. {{"\000\004\000", 3}, "AAQA"},
  342. {{"\000\010\000", 3}, "AAgA"},
  343. {{"\000\020\000", 3}, "ABAA"},
  344. {{"\000\040\000", 3}, "ACAA"},
  345. {{"\000\100\000", 3}, "AEAA"},
  346. {{"\000\200\000", 3}, "AIAA"},
  347. {{"\001\000\000", 3}, "AQAA"},
  348. {{"\002\000\000", 3}, "AgAA"},
  349. {{"\004\000\000", 3}, "BAAA"},
  350. {{"\010\000\000", 3}, "CAAA"},
  351. {{"\020\000\000", 3}, "EAAA"},
  352. {{"\040\000\000", 3}, "IAAA"},
  353. {{"\100\000\000", 3}, "QAAA"},
  354. {{"\200\000\000", 3}, "gAAA"},
  355. {{"\377\377\377", 3}, "////"},
  356. {{"\377\377\376", 3}, "///+"},
  357. {{"\377\377\375", 3}, "///9"},
  358. {{"\377\377\373", 3}, "///7"},
  359. {{"\377\377\367", 3}, "///3"},
  360. {{"\377\377\357", 3}, "///v"},
  361. {{"\377\377\337", 3}, "///f"},
  362. {{"\377\377\277", 3}, "//+/"},
  363. {{"\377\377\177", 3}, "//9/"},
  364. {{"\377\376\377", 3}, "//7/"},
  365. {{"\377\375\377", 3}, "//3/"},
  366. {{"\377\373\377", 3}, "//v/"},
  367. {{"\377\367\377", 3}, "//f/"},
  368. {{"\377\357\377", 3}, "/+//"},
  369. {{"\377\337\377", 3}, "/9//"},
  370. {{"\377\277\377", 3}, "/7//"},
  371. {{"\377\177\377", 3}, "/3//"},
  372. {{"\376\377\377", 3}, "/v//"},
  373. {{"\375\377\377", 3}, "/f//"},
  374. {{"\373\377\377", 3}, "+///"},
  375. {{"\367\377\377", 3}, "9///"},
  376. {{"\357\377\377", 3}, "7///"},
  377. {{"\337\377\377", 3}, "3///"},
  378. {{"\277\377\377", 3}, "v///"},
  379. {{"\177\377\377", 3}, "f///"},
  380. // Random numbers: values obtained with
  381. //
  382. // #! /bin/bash
  383. // dd bs=$1 count=1 if=/dev/random of=/tmp/bar.random
  384. // od -N $1 -t o1 /tmp/bar.random
  385. // uuencode -m test < /tmp/bar.random
  386. //
  387. // where $1 is the number of bytes (2, 3)
  388. {{"\243\361", 2}, "o/E="},
  389. {{"\024\167", 2}, "FHc="},
  390. {{"\313\252", 2}, "y6o="},
  391. {{"\046\041", 2}, "JiE="},
  392. {{"\145\236", 2}, "ZZ4="},
  393. {{"\254\325", 2}, "rNU="},
  394. {{"\061\330", 2}, "Mdg="},
  395. {{"\245\032", 2}, "pRo="},
  396. {{"\006\000", 2}, "BgA="},
  397. {{"\375\131", 2}, "/Vk="},
  398. {{"\303\210", 2}, "w4g="},
  399. {{"\040\037", 2}, "IB8="},
  400. {{"\261\372", 2}, "sfo="},
  401. {{"\335\014", 2}, "3Qw="},
  402. {{"\233\217", 2}, "m48="},
  403. {{"\373\056", 2}, "+y4="},
  404. {{"\247\232", 2}, "p5o="},
  405. {{"\107\053", 2}, "Rys="},
  406. {{"\204\077", 2}, "hD8="},
  407. {{"\276\211", 2}, "vok="},
  408. {{"\313\110", 2}, "y0g="},
  409. {{"\363\376", 2}, "8/4="},
  410. {{"\251\234", 2}, "qZw="},
  411. {{"\103\262", 2}, "Q7I="},
  412. {{"\142\312", 2}, "Yso="},
  413. {{"\067\211", 2}, "N4k="},
  414. {{"\220\001", 2}, "kAE="},
  415. {{"\152\240", 2}, "aqA="},
  416. {{"\367\061", 2}, "9zE="},
  417. {{"\133\255", 2}, "W60="},
  418. {{"\176\035", 2}, "fh0="},
  419. {{"\032\231", 2}, "Gpk="},
  420. {{"\013\007\144", 3}, "Cwdk"},
  421. {{"\030\112\106", 3}, "GEpG"},
  422. {{"\047\325\046", 3}, "J9Um"},
  423. {{"\310\160\022", 3}, "yHAS"},
  424. {{"\131\100\237", 3}, "WUCf"},
  425. {{"\064\342\134", 3}, "NOJc"},
  426. {{"\010\177\004", 3}, "CH8E"},
  427. {{"\345\147\205", 3}, "5WeF"},
  428. {{"\300\343\360", 3}, "wOPw"},
  429. {{"\061\240\201", 3}, "MaCB"},
  430. {{"\225\333\044", 3}, "ldsk"},
  431. {{"\215\137\352", 3}, "jV/q"},
  432. {{"\371\147\160", 3}, "+Wdw"},
  433. {{"\030\320\051", 3}, "GNAp"},
  434. {{"\044\174\241", 3}, "JHyh"},
  435. {{"\260\127\037", 3}, "sFcf"},
  436. {{"\111\045\033", 3}, "SSUb"},
  437. {{"\202\114\107", 3}, "gkxH"},
  438. {{"\057\371\042", 3}, "L/ki"},
  439. {{"\223\247\244", 3}, "k6ek"},
  440. {{"\047\216\144", 3}, "J45k"},
  441. {{"\203\070\327", 3}, "gzjX"},
  442. {{"\247\140\072", 3}, "p2A6"},
  443. {{"\124\115\116", 3}, "VE1O"},
  444. {{"\157\162\050", 3}, "b3Io"},
  445. {{"\357\223\004", 3}, "75ME"},
  446. {{"\052\117\156", 3}, "Kk9u"},
  447. {{"\347\154\000", 3}, "52wA"},
  448. {{"\303\012\142", 3}, "wwpi"},
  449. {{"\060\035\362", 3}, "MB3y"},
  450. {{"\130\226\361", 3}, "WJbx"},
  451. {{"\173\013\071", 3}, "ews5"},
  452. {{"\336\004\027", 3}, "3gQX"},
  453. {{"\357\366\234", 3}, "7/ac"},
  454. {{"\353\304\111", 3}, "68RJ"},
  455. {{"\024\264\131", 3}, "FLRZ"},
  456. {{"\075\114\251", 3}, "PUyp"},
  457. {{"\315\031\225", 3}, "zRmV"},
  458. {{"\154\201\276", 3}, "bIG+"},
  459. {{"\200\066\072", 3}, "gDY6"},
  460. {{"\142\350\267", 3}, "Yui3"},
  461. {{"\033\000\166", 3}, "GwB2"},
  462. {{"\210\055\077", 3}, "iC0/"},
  463. {{"\341\037\124", 3}, "4R9U"},
  464. {{"\161\103\152", 3}, "cUNq"},
  465. {{"\270\142\131", 3}, "uGJZ"},
  466. {{"\337\076\074", 3}, "3z48"},
  467. {{"\375\106\362", 3}, "/Uby"},
  468. {{"\227\301\127", 3}, "l8FX"},
  469. {{"\340\002\234", 3}, "4AKc"},
  470. {{"\121\064\033", 3}, "UTQb"},
  471. {{"\157\134\143", 3}, "b1xj"},
  472. {{"\247\055\327", 3}, "py3X"},
  473. {{"\340\142\005", 3}, "4GIF"},
  474. {{"\060\260\143", 3}, "MLBj"},
  475. {{"\075\203\170", 3}, "PYN4"},
  476. {{"\143\160\016", 3}, "Y3AO"},
  477. {{"\313\013\063", 3}, "ywsz"},
  478. {{"\174\236\135", 3}, "fJ5d"},
  479. {{"\103\047\026", 3}, "QycW"},
  480. {{"\365\005\343", 3}, "9QXj"},
  481. {{"\271\160\223", 3}, "uXCT"},
  482. {{"\362\255\172", 3}, "8q16"},
  483. {{"\113\012\015", 3}, "SwoN"},
  484. // various lengths, generated by this python script:
  485. //
  486. // from std::string import lowercase as lc
  487. // for i in range(27):
  488. // print '{ %2d, "%s",%s "%s" },' % (i, lc[:i], ' ' * (26-i),
  489. // lc[:i].encode('base64').strip())
  490. {{"", 0}, {"", 0}},
  491. {"a", "YQ=="},
  492. {"ab", "YWI="},
  493. {"abc", "YWJj"},
  494. {"abcd", "YWJjZA=="},
  495. {"abcde", "YWJjZGU="},
  496. {"abcdef", "YWJjZGVm"},
  497. {"abcdefg", "YWJjZGVmZw=="},
  498. {"abcdefgh", "YWJjZGVmZ2g="},
  499. {"abcdefghi", "YWJjZGVmZ2hp"},
  500. {"abcdefghij", "YWJjZGVmZ2hpag=="},
  501. {"abcdefghijk", "YWJjZGVmZ2hpams="},
  502. {"abcdefghijkl", "YWJjZGVmZ2hpamts"},
  503. {"abcdefghijklm", "YWJjZGVmZ2hpamtsbQ=="},
  504. {"abcdefghijklmn", "YWJjZGVmZ2hpamtsbW4="},
  505. {"abcdefghijklmno", "YWJjZGVmZ2hpamtsbW5v"},
  506. {"abcdefghijklmnop", "YWJjZGVmZ2hpamtsbW5vcA=="},
  507. {"abcdefghijklmnopq", "YWJjZGVmZ2hpamtsbW5vcHE="},
  508. {"abcdefghijklmnopqr", "YWJjZGVmZ2hpamtsbW5vcHFy"},
  509. {"abcdefghijklmnopqrs", "YWJjZGVmZ2hpamtsbW5vcHFycw=="},
  510. {"abcdefghijklmnopqrst", "YWJjZGVmZ2hpamtsbW5vcHFyc3Q="},
  511. {"abcdefghijklmnopqrstu", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1"},
  512. {"abcdefghijklmnopqrstuv", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dg=="},
  513. {"abcdefghijklmnopqrstuvw", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnc="},
  514. {"abcdefghijklmnopqrstuvwx", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4"},
  515. {"abcdefghijklmnopqrstuvwxy", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eQ=="},
  516. {"abcdefghijklmnopqrstuvwxyz", "YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="},
  517. };
  518. template <typename StringType>
  519. void TestEscapeAndUnescape() {
  520. // Check the short strings; this tests the math (and boundaries)
  521. for (const auto& tc : base64_tests) {
  522. StringType encoded("this junk should be ignored");
  523. absl::Base64Escape(tc.plaintext, &encoded);
  524. EXPECT_EQ(encoded, tc.cyphertext);
  525. EXPECT_EQ(absl::Base64Escape(tc.plaintext), tc.cyphertext);
  526. StringType decoded("this junk should be ignored");
  527. EXPECT_TRUE(absl::Base64Unescape(encoded, &decoded));
  528. EXPECT_EQ(decoded, tc.plaintext);
  529. StringType websafe(tc.cyphertext);
  530. for (int c = 0; c < websafe.size(); ++c) {
  531. if ('+' == websafe[c]) websafe[c] = '-';
  532. if ('/' == websafe[c]) websafe[c] = '_';
  533. if ('=' == websafe[c]) {
  534. websafe.resize(c);
  535. break;
  536. }
  537. }
  538. encoded = "this junk should be ignored";
  539. absl::WebSafeBase64Escape(tc.plaintext, &encoded);
  540. EXPECT_EQ(encoded, websafe);
  541. EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), websafe);
  542. // Let's try the string version of the decoder
  543. decoded = "this junk should be ignored";
  544. EXPECT_TRUE(absl::WebSafeBase64Unescape(websafe, &decoded));
  545. EXPECT_EQ(decoded, tc.plaintext);
  546. }
  547. // Now try the long strings, this tests the streaming
  548. for (const auto& tc : absl::strings_internal::base64_strings()) {
  549. StringType buffer;
  550. absl::WebSafeBase64Escape(tc.plaintext, &buffer);
  551. EXPECT_EQ(tc.cyphertext, buffer);
  552. EXPECT_EQ(absl::WebSafeBase64Escape(tc.plaintext), tc.cyphertext);
  553. }
  554. // Verify the behavior when decoding bad data
  555. {
  556. absl::string_view data_set[] = {"ab-/", absl::string_view("\0bcd", 4),
  557. absl::string_view("abc.\0", 5)};
  558. for (absl::string_view bad_data : data_set) {
  559. StringType buf;
  560. EXPECT_FALSE(absl::Base64Unescape(bad_data, &buf));
  561. EXPECT_FALSE(absl::WebSafeBase64Unescape(bad_data, &buf));
  562. EXPECT_TRUE(buf.empty());
  563. }
  564. }
  565. }
  566. TEST(Base64, EscapeAndUnescape) {
  567. TestEscapeAndUnescape<std::string>();
  568. }
  569. TEST(Base64, DISABLED_HugeData) {
  570. const size_t kSize = size_t(3) * 1000 * 1000 * 1000;
  571. static_assert(kSize % 3 == 0, "kSize must be divisible by 3");
  572. const std::string huge(kSize, 'x');
  573. std::string escaped;
  574. absl::Base64Escape(huge, &escaped);
  575. // Generates the string that should match a base64 encoded "xxx..." string.
  576. // "xxx" in base64 is "eHh4".
  577. std::string expected_encoding;
  578. expected_encoding.reserve(kSize / 3 * 4);
  579. for (size_t i = 0; i < kSize / 3; ++i) {
  580. expected_encoding.append("eHh4");
  581. }
  582. EXPECT_EQ(expected_encoding, escaped);
  583. std::string unescaped;
  584. EXPECT_TRUE(absl::Base64Unescape(escaped, &unescaped));
  585. EXPECT_EQ(huge, unescaped);
  586. }
  587. TEST(HexAndBack, HexStringToBytes_and_BytesToHexString) {
  588. std::string hex_mixed = "0123456789abcdefABCDEF";
  589. std::string bytes_expected = "\x01\x23\x45\x67\x89\xab\xcd\xef\xAB\xCD\xEF";
  590. std::string hex_only_lower = "0123456789abcdefabcdef";
  591. std::string bytes_result = absl::HexStringToBytes(hex_mixed);
  592. EXPECT_EQ(bytes_expected, bytes_result);
  593. std::string prefix_valid = hex_mixed + "?";
  594. std::string prefix_valid_result = absl::HexStringToBytes(
  595. absl::string_view(prefix_valid.data(), prefix_valid.size() - 1));
  596. EXPECT_EQ(bytes_expected, prefix_valid_result);
  597. std::string infix_valid = "?" + hex_mixed + "???";
  598. std::string infix_valid_result = absl::HexStringToBytes(
  599. absl::string_view(infix_valid.data() + 1, hex_mixed.size()));
  600. EXPECT_EQ(bytes_expected, infix_valid_result);
  601. std::string hex_result = absl::BytesToHexString(bytes_expected);
  602. EXPECT_EQ(hex_only_lower, hex_result);
  603. }
  604. } // namespace