str_split_test.cc 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/strings/str_split.h"
  15. #include <deque>
  16. #include <initializer_list>
  17. #include <list>
  18. #include <map>
  19. #include <memory>
  20. #include <string>
  21. #include <type_traits>
  22. #include <unordered_map>
  23. #include <unordered_set>
  24. #include <vector>
  25. #include "gmock/gmock.h"
  26. #include "gtest/gtest.h"
  27. #include "absl/base/dynamic_annotations.h"
  28. #include "absl/base/macros.h"
  29. #include "absl/container/btree_map.h"
  30. #include "absl/container/btree_set.h"
  31. #include "absl/container/flat_hash_map.h"
  32. #include "absl/container/node_hash_map.h"
  33. #include "absl/strings/numbers.h"
  34. namespace {
  35. using ::testing::ElementsAre;
  36. using ::testing::Pair;
  37. using ::testing::UnorderedElementsAre;
  38. TEST(Split, TraitsTest) {
  39. static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
  40. "");
  41. static_assert(
  42. !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
  43. static_assert(absl::strings_internal::SplitterIsConvertibleTo<
  44. std::vector<std::string>>::value,
  45. "");
  46. static_assert(
  47. !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
  48. "");
  49. static_assert(absl::strings_internal::SplitterIsConvertibleTo<
  50. std::vector<absl::string_view>>::value,
  51. "");
  52. static_assert(absl::strings_internal::SplitterIsConvertibleTo<
  53. std::map<std::string, std::string>>::value,
  54. "");
  55. static_assert(absl::strings_internal::SplitterIsConvertibleTo<
  56. std::map<absl::string_view, absl::string_view>>::value,
  57. "");
  58. static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
  59. std::map<int, std::string>>::value,
  60. "");
  61. static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
  62. std::map<std::string, int>>::value,
  63. "");
  64. }
  65. // This tests the overall split API, which is made up of the absl::StrSplit()
  66. // function and the Delimiter objects in the absl:: namespace.
  67. // This TEST macro is outside of any namespace to require full specification of
  68. // namespaces just like callers will need to use.
  69. TEST(Split, APIExamples) {
  70. {
  71. // Passes string delimiter. Assumes the default of ByString.
  72. std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT
  73. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  74. // Equivalent to...
  75. using absl::ByString;
  76. v = absl::StrSplit("a,b,c", ByString(","));
  77. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  78. // Equivalent to...
  79. EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
  80. ElementsAre("a", "b", "c"));
  81. }
  82. {
  83. // Same as above, but using a single character as the delimiter.
  84. std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
  85. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  86. // Equivalent to...
  87. using absl::ByChar;
  88. v = absl::StrSplit("a,b,c", ByChar(','));
  89. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  90. }
  91. {
  92. // Uses the Literal string "=>" as the delimiter.
  93. const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
  94. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  95. }
  96. {
  97. // The substrings are returned as string_views, eliminating copying.
  98. std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
  99. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  100. }
  101. {
  102. // Leading and trailing empty substrings.
  103. std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
  104. EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
  105. }
  106. {
  107. // Splits on a delimiter that is not found.
  108. std::vector<std::string> v = absl::StrSplit("abc", ',');
  109. EXPECT_THAT(v, ElementsAre("abc"));
  110. }
  111. {
  112. // Splits the input string into individual characters by using an empty
  113. // string as the delimiter.
  114. std::vector<std::string> v = absl::StrSplit("abc", "");
  115. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  116. }
  117. {
  118. // Splits string data with embedded NUL characters, using NUL as the
  119. // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
  120. // say that's the empty string when constructing the absl::string_view
  121. // delimiter. Instead, a non-empty string containing NUL can be used as the
  122. // delimiter.
  123. std::string embedded_nulls("a\0b\0c", 5);
  124. std::string null_delim("\0", 1);
  125. std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
  126. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  127. }
  128. {
  129. // Stores first two split strings as the members in a std::pair.
  130. std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
  131. EXPECT_EQ("a", p.first);
  132. EXPECT_EQ("b", p.second);
  133. // "c" is omitted because std::pair can hold only two elements.
  134. }
  135. {
  136. // Results stored in std::set<std::string>
  137. std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
  138. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  139. }
  140. {
  141. // Uses a non-const char* delimiter.
  142. char a[] = ",";
  143. char* d = a + 0;
  144. std::vector<std::string> v = absl::StrSplit("a,b,c", d);
  145. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  146. }
  147. {
  148. // Results split using either of , or ;
  149. using absl::ByAnyChar;
  150. std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
  151. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  152. }
  153. {
  154. // Uses the SkipWhitespace predicate.
  155. using absl::SkipWhitespace;
  156. std::vector<std::string> v =
  157. absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
  158. EXPECT_THAT(v, ElementsAre(" a ", "b"));
  159. }
  160. {
  161. // Uses the ByLength delimiter.
  162. using absl::ByLength;
  163. std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
  164. EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
  165. }
  166. {
  167. // Different forms of initialization / conversion.
  168. std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
  169. EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
  170. std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
  171. EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
  172. auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
  173. EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
  174. v3 = absl::StrSplit("a,b,c", ',');
  175. EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
  176. }
  177. {
  178. // Results stored in a std::map.
  179. std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
  180. EXPECT_EQ(2, m.size());
  181. EXPECT_EQ("3", m["a"]);
  182. EXPECT_EQ("2", m["b"]);
  183. }
  184. {
  185. // Results stored in a std::multimap.
  186. std::multimap<std::string, std::string> m =
  187. absl::StrSplit("a,1,b,2,a,3", ',');
  188. EXPECT_EQ(3, m.size());
  189. auto it = m.find("a");
  190. EXPECT_EQ("1", it->second);
  191. ++it;
  192. EXPECT_EQ("3", it->second);
  193. it = m.find("b");
  194. EXPECT_EQ("2", it->second);
  195. }
  196. {
  197. // Demonstrates use in a range-based for loop in C++11.
  198. std::string s = "x,x,x,x,x,x,x";
  199. for (absl::string_view sp : absl::StrSplit(s, ',')) {
  200. EXPECT_EQ("x", sp);
  201. }
  202. }
  203. {
  204. // Demonstrates use with a Predicate in a range-based for loop.
  205. using absl::SkipWhitespace;
  206. std::string s = " ,x,,x,,x,x,x,,";
  207. for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
  208. EXPECT_EQ("x", sp);
  209. }
  210. }
  211. {
  212. // Demonstrates a "smart" split to std::map using two separate calls to
  213. // absl::StrSplit. One call to split the records, and another call to split
  214. // the keys and values. This also uses the Limit delimiter so that the
  215. // std::string "a=b=c" will split to "a" -> "b=c".
  216. std::map<std::string, std::string> m;
  217. for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
  218. m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
  219. }
  220. EXPECT_EQ("b=c", m.find("a")->second);
  221. EXPECT_EQ("e", m.find("d")->second);
  222. EXPECT_EQ("", m.find("f")->second);
  223. EXPECT_EQ("", m.find("g")->second);
  224. }
  225. }
  226. //
  227. // Tests for SplitIterator
  228. //
  229. TEST(SplitIterator, Basics) {
  230. auto splitter = absl::StrSplit("a,b", ',');
  231. auto it = splitter.begin();
  232. auto end = splitter.end();
  233. EXPECT_NE(it, end);
  234. EXPECT_EQ("a", *it); // tests dereference
  235. ++it; // tests preincrement
  236. EXPECT_NE(it, end);
  237. EXPECT_EQ("b",
  238. std::string(it->data(), it->size())); // tests dereference as ptr
  239. it++; // tests postincrement
  240. EXPECT_EQ(it, end);
  241. }
  242. // Simple Predicate to skip a particular string.
  243. class Skip {
  244. public:
  245. explicit Skip(const std::string& s) : s_(s) {}
  246. bool operator()(absl::string_view sp) { return sp != s_; }
  247. private:
  248. std::string s_;
  249. };
  250. TEST(SplitIterator, Predicate) {
  251. auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
  252. auto it = splitter.begin();
  253. auto end = splitter.end();
  254. EXPECT_NE(it, end);
  255. EXPECT_EQ("a", *it); // tests dereference
  256. ++it; // tests preincrement -- "b" should be skipped here.
  257. EXPECT_NE(it, end);
  258. EXPECT_EQ("c",
  259. std::string(it->data(), it->size())); // tests dereference as ptr
  260. it++; // tests postincrement
  261. EXPECT_EQ(it, end);
  262. }
  263. TEST(SplitIterator, EdgeCases) {
  264. // Expected input and output, assuming a delimiter of ','
  265. struct {
  266. std::string in;
  267. std::vector<std::string> expect;
  268. } specs[] = {
  269. {"", {""}},
  270. {"foo", {"foo"}},
  271. {",", {"", ""}},
  272. {",foo", {"", "foo"}},
  273. {"foo,", {"foo", ""}},
  274. {",foo,", {"", "foo", ""}},
  275. {"foo,bar", {"foo", "bar"}},
  276. };
  277. for (const auto& spec : specs) {
  278. SCOPED_TRACE(spec.in);
  279. auto splitter = absl::StrSplit(spec.in, ',');
  280. auto it = splitter.begin();
  281. auto end = splitter.end();
  282. for (const auto& expected : spec.expect) {
  283. EXPECT_NE(it, end);
  284. EXPECT_EQ(expected, *it++);
  285. }
  286. EXPECT_EQ(it, end);
  287. }
  288. }
  289. TEST(Splitter, Const) {
  290. const auto splitter = absl::StrSplit("a,b,c", ',');
  291. EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
  292. }
  293. TEST(Split, EmptyAndNull) {
  294. // Attention: Splitting a null absl::string_view is different than splitting
  295. // an empty absl::string_view even though both string_views are considered
  296. // equal. This behavior is likely surprising and undesirable. However, to
  297. // maintain backward compatibility, there is a small "hack" in
  298. // str_split_internal.h that preserves this behavior. If that behavior is ever
  299. // changed/fixed, this test will need to be updated.
  300. EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
  301. EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
  302. }
  303. TEST(SplitIterator, EqualityAsEndCondition) {
  304. auto splitter = absl::StrSplit("a,b,c", ',');
  305. auto it = splitter.begin();
  306. auto it2 = it;
  307. // Increments it2 twice to point to "c" in the input text.
  308. ++it2;
  309. ++it2;
  310. EXPECT_EQ("c", *it2);
  311. // This test uses a non-end SplitIterator as the terminating condition in a
  312. // for loop. This relies on SplitIterator equality for non-end SplitIterators
  313. // working correctly. At this point it2 points to "c", and we use that as the
  314. // "end" condition in this test.
  315. std::vector<absl::string_view> v;
  316. for (; it != it2; ++it) {
  317. v.push_back(*it);
  318. }
  319. EXPECT_THAT(v, ElementsAre("a", "b"));
  320. }
  321. //
  322. // Tests for Splitter
  323. //
  324. TEST(Splitter, RangeIterators) {
  325. auto splitter = absl::StrSplit("a,b,c", ',');
  326. std::vector<absl::string_view> output;
  327. for (const absl::string_view& p : splitter) {
  328. output.push_back(p);
  329. }
  330. EXPECT_THAT(output, ElementsAre("a", "b", "c"));
  331. }
  332. // Some template functions for use in testing conversion operators
  333. template <typename ContainerType, typename Splitter>
  334. void TestConversionOperator(const Splitter& splitter) {
  335. ContainerType output = splitter;
  336. EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
  337. }
  338. template <typename MapType, typename Splitter>
  339. void TestMapConversionOperator(const Splitter& splitter) {
  340. MapType m = splitter;
  341. EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
  342. }
  343. template <typename FirstType, typename SecondType, typename Splitter>
  344. void TestPairConversionOperator(const Splitter& splitter) {
  345. std::pair<FirstType, SecondType> p = splitter;
  346. EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
  347. }
  348. TEST(Splitter, ConversionOperator) {
  349. auto splitter = absl::StrSplit("a,b,c,d", ',');
  350. TestConversionOperator<std::vector<absl::string_view>>(splitter);
  351. TestConversionOperator<std::vector<std::string>>(splitter);
  352. TestConversionOperator<std::list<absl::string_view>>(splitter);
  353. TestConversionOperator<std::list<std::string>>(splitter);
  354. TestConversionOperator<std::deque<absl::string_view>>(splitter);
  355. TestConversionOperator<std::deque<std::string>>(splitter);
  356. TestConversionOperator<std::set<absl::string_view>>(splitter);
  357. TestConversionOperator<std::set<std::string>>(splitter);
  358. TestConversionOperator<std::multiset<absl::string_view>>(splitter);
  359. TestConversionOperator<std::multiset<std::string>>(splitter);
  360. TestConversionOperator<absl::btree_set<absl::string_view>>(splitter);
  361. TestConversionOperator<absl::btree_set<std::string>>(splitter);
  362. TestConversionOperator<absl::btree_multiset<absl::string_view>>(splitter);
  363. TestConversionOperator<absl::btree_multiset<std::string>>(splitter);
  364. TestConversionOperator<std::unordered_set<std::string>>(splitter);
  365. // Tests conversion to map-like objects.
  366. TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
  367. splitter);
  368. TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
  369. TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
  370. TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
  371. TestMapConversionOperator<
  372. std::multimap<absl::string_view, absl::string_view>>(splitter);
  373. TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
  374. splitter);
  375. TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
  376. splitter);
  377. TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
  378. TestMapConversionOperator<
  379. absl::btree_map<absl::string_view, absl::string_view>>(splitter);
  380. TestMapConversionOperator<absl::btree_map<absl::string_view, std::string>>(
  381. splitter);
  382. TestMapConversionOperator<absl::btree_map<std::string, absl::string_view>>(
  383. splitter);
  384. TestMapConversionOperator<absl::btree_map<std::string, std::string>>(
  385. splitter);
  386. TestMapConversionOperator<
  387. absl::btree_multimap<absl::string_view, absl::string_view>>(splitter);
  388. TestMapConversionOperator<
  389. absl::btree_multimap<absl::string_view, std::string>>(splitter);
  390. TestMapConversionOperator<
  391. absl::btree_multimap<std::string, absl::string_view>>(splitter);
  392. TestMapConversionOperator<absl::btree_multimap<std::string, std::string>>(
  393. splitter);
  394. TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
  395. splitter);
  396. TestMapConversionOperator<
  397. absl::node_hash_map<absl::string_view, absl::string_view>>(splitter);
  398. TestMapConversionOperator<
  399. absl::node_hash_map<absl::string_view, std::string>>(splitter);
  400. TestMapConversionOperator<
  401. absl::node_hash_map<std::string, absl::string_view>>(splitter);
  402. TestMapConversionOperator<
  403. absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter);
  404. TestMapConversionOperator<
  405. absl::flat_hash_map<absl::string_view, std::string>>(splitter);
  406. TestMapConversionOperator<
  407. absl::flat_hash_map<std::string, absl::string_view>>(splitter);
  408. // Tests conversion to std::pair
  409. TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
  410. TestPairConversionOperator<absl::string_view, std::string>(splitter);
  411. TestPairConversionOperator<std::string, absl::string_view>(splitter);
  412. TestPairConversionOperator<std::string, std::string>(splitter);
  413. }
  414. // A few additional tests for conversion to std::pair. This conversion is
  415. // different from others because a std::pair always has exactly two elements:
  416. // .first and .second. The split has to work even when the split has
  417. // less-than, equal-to, and more-than 2 strings.
  418. TEST(Splitter, ToPair) {
  419. {
  420. // Empty string
  421. std::pair<std::string, std::string> p = absl::StrSplit("", ',');
  422. EXPECT_EQ("", p.first);
  423. EXPECT_EQ("", p.second);
  424. }
  425. {
  426. // Only first
  427. std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
  428. EXPECT_EQ("a", p.first);
  429. EXPECT_EQ("", p.second);
  430. }
  431. {
  432. // Only second
  433. std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
  434. EXPECT_EQ("", p.first);
  435. EXPECT_EQ("b", p.second);
  436. }
  437. {
  438. // First and second.
  439. std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
  440. EXPECT_EQ("a", p.first);
  441. EXPECT_EQ("b", p.second);
  442. }
  443. {
  444. // First and second and then more stuff that will be ignored.
  445. std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
  446. EXPECT_EQ("a", p.first);
  447. EXPECT_EQ("b", p.second);
  448. // "c" is omitted.
  449. }
  450. }
  451. TEST(Splitter, Predicates) {
  452. static const char kTestChars[] = ",a, ,b,";
  453. using absl::AllowEmpty;
  454. using absl::SkipEmpty;
  455. using absl::SkipWhitespace;
  456. {
  457. // No predicate. Does not skip empties.
  458. auto splitter = absl::StrSplit(kTestChars, ',');
  459. std::vector<std::string> v = splitter;
  460. EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
  461. }
  462. {
  463. // Allows empty strings. Same behavior as no predicate at all.
  464. auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
  465. std::vector<std::string> v_allowempty = splitter;
  466. EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
  467. // Ensures AllowEmpty equals the behavior with no predicate.
  468. auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
  469. std::vector<std::string> v_nopredicate = splitter_nopredicate;
  470. EXPECT_EQ(v_allowempty, v_nopredicate);
  471. }
  472. {
  473. // Skips empty strings.
  474. auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
  475. std::vector<std::string> v = splitter;
  476. EXPECT_THAT(v, ElementsAre("a", " ", "b"));
  477. }
  478. {
  479. // Skips empty and all-whitespace strings.
  480. auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
  481. std::vector<std::string> v = splitter;
  482. EXPECT_THAT(v, ElementsAre("a", "b"));
  483. }
  484. }
  485. //
  486. // Tests for StrSplit()
  487. //
  488. TEST(Split, Basics) {
  489. {
  490. // Doesn't really do anything useful because the return value is ignored,
  491. // but it should work.
  492. absl::StrSplit("a,b,c", ',');
  493. }
  494. {
  495. std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
  496. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  497. }
  498. {
  499. std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
  500. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  501. }
  502. {
  503. // Ensures that assignment works. This requires a little extra work with
  504. // C++11 because of overloads with initializer_list.
  505. std::vector<std::string> v;
  506. v = absl::StrSplit("a,b,c", ',');
  507. EXPECT_THAT(v, ElementsAre("a", "b", "c"));
  508. std::map<std::string, std::string> m;
  509. m = absl::StrSplit("a,b,c", ',');
  510. EXPECT_EQ(2, m.size());
  511. std::unordered_map<std::string, std::string> hm;
  512. hm = absl::StrSplit("a,b,c", ',');
  513. EXPECT_EQ(2, hm.size());
  514. }
  515. }
  516. absl::string_view ReturnStringView() { return "Hello World"; }
  517. const char* ReturnConstCharP() { return "Hello World"; }
  518. char* ReturnCharP() { return const_cast<char*>("Hello World"); }
  519. TEST(Split, AcceptsCertainTemporaries) {
  520. std::vector<std::string> v;
  521. v = absl::StrSplit(ReturnStringView(), ' ');
  522. EXPECT_THAT(v, ElementsAre("Hello", "World"));
  523. v = absl::StrSplit(ReturnConstCharP(), ' ');
  524. EXPECT_THAT(v, ElementsAre("Hello", "World"));
  525. v = absl::StrSplit(ReturnCharP(), ' ');
  526. EXPECT_THAT(v, ElementsAre("Hello", "World"));
  527. }
  528. TEST(Split, Temporary) {
  529. // Use a std::string longer than the SSO length, so that when the temporary is
  530. // destroyed, if the splitter keeps a reference to the string's contents,
  531. // it'll reference freed memory instead of just dead on-stack memory.
  532. const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
  533. EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
  534. << "Input should be larger than fits on the stack.";
  535. // This happens more often in C++11 as part of a range-based for loop.
  536. auto splitter = absl::StrSplit(std::string(input), ',');
  537. std::string expected = "a";
  538. for (absl::string_view letter : splitter) {
  539. EXPECT_EQ(expected, letter);
  540. ++expected[0];
  541. }
  542. EXPECT_EQ("v", expected);
  543. // This happens more often in C++11 as part of a range-based for loop.
  544. auto std_splitter = absl::StrSplit(std::string(input), ',');
  545. expected = "a";
  546. for (absl::string_view letter : std_splitter) {
  547. EXPECT_EQ(expected, letter);
  548. ++expected[0];
  549. }
  550. EXPECT_EQ("v", expected);
  551. }
  552. template <typename T>
  553. static std::unique_ptr<T> CopyToHeap(const T& value) {
  554. return std::unique_ptr<T>(new T(value));
  555. }
  556. TEST(Split, LvalueCaptureIsCopyable) {
  557. std::string input = "a,b";
  558. auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
  559. auto stack_splitter = *heap_splitter;
  560. heap_splitter.reset();
  561. std::vector<std::string> result = stack_splitter;
  562. EXPECT_THAT(result, testing::ElementsAre("a", "b"));
  563. }
  564. TEST(Split, TemporaryCaptureIsCopyable) {
  565. auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
  566. auto stack_splitter = *heap_splitter;
  567. heap_splitter.reset();
  568. std::vector<std::string> result = stack_splitter;
  569. EXPECT_THAT(result, testing::ElementsAre("a", "b"));
  570. }
  571. TEST(Split, SplitterIsCopyableAndMoveable) {
  572. auto a = absl::StrSplit("foo", '-');
  573. // Ensures that the following expressions compile.
  574. auto b = a; // Copy construct
  575. auto c = std::move(a); // Move construct
  576. b = c; // Copy assign
  577. c = std::move(b); // Move assign
  578. EXPECT_THAT(c, ElementsAre("foo"));
  579. }
  580. TEST(Split, StringDelimiter) {
  581. {
  582. std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
  583. EXPECT_THAT(v, ElementsAre("a", "b"));
  584. }
  585. {
  586. std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
  587. EXPECT_THAT(v, ElementsAre("a", "b"));
  588. }
  589. {
  590. std::vector<absl::string_view> v =
  591. absl::StrSplit("a,b", absl::string_view(","));
  592. EXPECT_THAT(v, ElementsAre("a", "b"));
  593. }
  594. }
  595. #if !defined(__cpp_char8_t)
  596. #if defined(__clang__)
  597. #pragma clang diagnostic push
  598. #pragma clang diagnostic ignored "-Wc++2a-compat"
  599. #endif
  600. TEST(Split, UTF8) {
  601. // Tests splitting utf8 strings and utf8 delimiters.
  602. std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
  603. {
  604. // A utf8 input string with an ascii delimiter.
  605. std::string to_split = "a," + utf8_string;
  606. std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
  607. EXPECT_THAT(v, ElementsAre("a", utf8_string));
  608. }
  609. {
  610. // A utf8 input string and a utf8 delimiter.
  611. std::string to_split = "a," + utf8_string + ",b";
  612. std::string unicode_delimiter = "," + utf8_string + ",";
  613. std::vector<absl::string_view> v =
  614. absl::StrSplit(to_split, unicode_delimiter);
  615. EXPECT_THAT(v, ElementsAre("a", "b"));
  616. }
  617. {
  618. // A utf8 input string and ByAnyChar with ascii chars.
  619. std::vector<absl::string_view> v =
  620. absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
  621. EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
  622. }
  623. }
  624. #if defined(__clang__)
  625. #pragma clang diagnostic pop
  626. #endif
  627. #endif // !defined(__cpp_char8_t)
  628. TEST(Split, EmptyStringDelimiter) {
  629. {
  630. std::vector<std::string> v = absl::StrSplit("", "");
  631. EXPECT_THAT(v, ElementsAre(""));
  632. }
  633. {
  634. std::vector<std::string> v = absl::StrSplit("a", "");
  635. EXPECT_THAT(v, ElementsAre("a"));
  636. }
  637. {
  638. std::vector<std::string> v = absl::StrSplit("ab", "");
  639. EXPECT_THAT(v, ElementsAre("a", "b"));
  640. }
  641. {
  642. std::vector<std::string> v = absl::StrSplit("a b", "");
  643. EXPECT_THAT(v, ElementsAre("a", " ", "b"));
  644. }
  645. }
  646. TEST(Split, SubstrDelimiter) {
  647. std::vector<absl::string_view> results;
  648. absl::string_view delim("//");
  649. results = absl::StrSplit("", delim);
  650. EXPECT_THAT(results, ElementsAre(""));
  651. results = absl::StrSplit("//", delim);
  652. EXPECT_THAT(results, ElementsAre("", ""));
  653. results = absl::StrSplit("ab", delim);
  654. EXPECT_THAT(results, ElementsAre("ab"));
  655. results = absl::StrSplit("ab//", delim);
  656. EXPECT_THAT(results, ElementsAre("ab", ""));
  657. results = absl::StrSplit("ab/", delim);
  658. EXPECT_THAT(results, ElementsAre("ab/"));
  659. results = absl::StrSplit("a/b", delim);
  660. EXPECT_THAT(results, ElementsAre("a/b"));
  661. results = absl::StrSplit("a//b", delim);
  662. EXPECT_THAT(results, ElementsAre("a", "b"));
  663. results = absl::StrSplit("a///b", delim);
  664. EXPECT_THAT(results, ElementsAre("a", "/b"));
  665. results = absl::StrSplit("a////b", delim);
  666. EXPECT_THAT(results, ElementsAre("a", "", "b"));
  667. }
  668. TEST(Split, EmptyResults) {
  669. std::vector<absl::string_view> results;
  670. results = absl::StrSplit("", '#');
  671. EXPECT_THAT(results, ElementsAre(""));
  672. results = absl::StrSplit("#", '#');
  673. EXPECT_THAT(results, ElementsAre("", ""));
  674. results = absl::StrSplit("#cd", '#');
  675. EXPECT_THAT(results, ElementsAre("", "cd"));
  676. results = absl::StrSplit("ab#cd#", '#');
  677. EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
  678. results = absl::StrSplit("ab##cd", '#');
  679. EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
  680. results = absl::StrSplit("ab##", '#');
  681. EXPECT_THAT(results, ElementsAre("ab", "", ""));
  682. results = absl::StrSplit("ab#ab#", '#');
  683. EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
  684. results = absl::StrSplit("aaaa", 'a');
  685. EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
  686. results = absl::StrSplit("", '#', absl::SkipEmpty());
  687. EXPECT_THAT(results, ElementsAre());
  688. }
  689. template <typename Delimiter>
  690. static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
  691. size_t starting_pos, int expected_pos) {
  692. absl::string_view found = d.Find(text, starting_pos);
  693. return found.data() != text.data() + text.size() &&
  694. expected_pos == found.data() - text.data();
  695. }
  696. // Helper function for testing Delimiter objects. Returns true if the given
  697. // Delimiter is found in the given string at the given position. This function
  698. // tests two cases:
  699. // 1. The actual text given, staring at position 0
  700. // 2. The text given with leading padding that should be ignored
  701. template <typename Delimiter>
  702. static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
  703. const std::string leading_text = ",x,y,z,";
  704. return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
  705. IsFoundAtStartingPos(leading_text + std::string(text), d,
  706. leading_text.length(),
  707. expected_pos + leading_text.length());
  708. }
  709. //
  710. // Tests for ByString
  711. //
  712. // Tests using any delimiter that represents a single comma.
  713. template <typename Delimiter>
  714. void TestComma(Delimiter d) {
  715. EXPECT_TRUE(IsFoundAt(",", d, 0));
  716. EXPECT_TRUE(IsFoundAt("a,", d, 1));
  717. EXPECT_TRUE(IsFoundAt(",b", d, 0));
  718. EXPECT_TRUE(IsFoundAt("a,b", d, 1));
  719. EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
  720. EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
  721. EXPECT_FALSE(IsFoundAt("", d, -1));
  722. EXPECT_FALSE(IsFoundAt(" ", d, -1));
  723. EXPECT_FALSE(IsFoundAt("a", d, -1));
  724. EXPECT_FALSE(IsFoundAt("a b c", d, -1));
  725. EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
  726. EXPECT_FALSE(IsFoundAt(";", d, -1));
  727. }
  728. TEST(Delimiter, ByString) {
  729. using absl::ByString;
  730. TestComma(ByString(","));
  731. // Works as named variable.
  732. ByString comma_string(",");
  733. TestComma(comma_string);
  734. // The first occurrence of empty string ("") in a string is at position 0.
  735. // There is a test below that demonstrates this for absl::string_view::find().
  736. // If the ByString delimiter returned position 0 for this, there would
  737. // be an infinite loop in the SplitIterator code. To avoid this, empty string
  738. // is a special case in that it always returns the item at position 1.
  739. absl::string_view abc("abc");
  740. EXPECT_EQ(0, abc.find("")); // "" is found at position 0
  741. ByString empty("");
  742. EXPECT_FALSE(IsFoundAt("", empty, 0));
  743. EXPECT_FALSE(IsFoundAt("a", empty, 0));
  744. EXPECT_TRUE(IsFoundAt("ab", empty, 1));
  745. EXPECT_TRUE(IsFoundAt("abc", empty, 1));
  746. }
  747. TEST(Split, ByChar) {
  748. using absl::ByChar;
  749. TestComma(ByChar(','));
  750. // Works as named variable.
  751. ByChar comma_char(',');
  752. TestComma(comma_char);
  753. }
  754. //
  755. // Tests for ByAnyChar
  756. //
  757. TEST(Delimiter, ByAnyChar) {
  758. using absl::ByAnyChar;
  759. ByAnyChar one_delim(",");
  760. // Found
  761. EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
  762. EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
  763. EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
  764. EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
  765. // Not found
  766. EXPECT_FALSE(IsFoundAt("", one_delim, -1));
  767. EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
  768. EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
  769. EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
  770. EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
  771. ByAnyChar two_delims(",;");
  772. // Found
  773. EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
  774. EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
  775. EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
  776. EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
  777. EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
  778. EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
  779. EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
  780. EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
  781. EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
  782. EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
  783. // Not found
  784. EXPECT_FALSE(IsFoundAt("", two_delims, -1));
  785. EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
  786. EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
  787. EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
  788. EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
  789. // ByAnyChar behaves just like ByString when given a delimiter of empty
  790. // string. That is, it always returns a zero-length absl::string_view
  791. // referring to the item at position 1, not position 0.
  792. ByAnyChar empty("");
  793. EXPECT_FALSE(IsFoundAt("", empty, 0));
  794. EXPECT_FALSE(IsFoundAt("a", empty, 0));
  795. EXPECT_TRUE(IsFoundAt("ab", empty, 1));
  796. EXPECT_TRUE(IsFoundAt("abc", empty, 1));
  797. }
  798. //
  799. // Tests for ByLength
  800. //
  801. TEST(Delimiter, ByLength) {
  802. using absl::ByLength;
  803. ByLength four_char_delim(4);
  804. // Found
  805. EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
  806. EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
  807. EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
  808. // Not found
  809. EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
  810. EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
  811. EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
  812. EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
  813. EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
  814. }
  815. TEST(Split, WorksWithLargeStrings) {
  816. #if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
  817. defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER)
  818. constexpr size_t kSize = (uint32_t{1} << 26) + 1; // 64M + 1 byte
  819. #else
  820. constexpr size_t kSize = (uint32_t{1} << 31) + 1; // 2G + 1 byte
  821. #endif
  822. if (sizeof(size_t) > 4) {
  823. std::string s(kSize, 'x');
  824. s.back() = '-';
  825. std::vector<absl::string_view> v = absl::StrSplit(s, '-');
  826. EXPECT_EQ(2, v.size());
  827. // The first element will contain 2G of 'x's.
  828. // testing::StartsWith is too slow with a 2G string.
  829. EXPECT_EQ('x', v[0][0]);
  830. EXPECT_EQ('x', v[0][1]);
  831. EXPECT_EQ('x', v[0][3]);
  832. EXPECT_EQ("", v[1]);
  833. }
  834. }
  835. TEST(SplitInternalTest, TypeTraits) {
  836. EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
  837. EXPECT_TRUE(
  838. (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
  839. EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
  840. EXPECT_TRUE(
  841. (absl::strings_internal::HasValueType<std::map<int, int>>::value));
  842. EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
  843. EXPECT_TRUE(
  844. (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
  845. EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
  846. EXPECT_TRUE((absl::strings_internal::IsInitializerList<
  847. std::initializer_list<int>>::value));
  848. }
  849. } // namespace