parser.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. // Copyright 2020 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "absl/strings/internal/str_format/parser.h"
  15. #include <assert.h>
  16. #include <string.h>
  17. #include <wchar.h>
  18. #include <cctype>
  19. #include <cstdint>
  20. #include <algorithm>
  21. #include <initializer_list>
  22. #include <limits>
  23. #include <ostream>
  24. #include <string>
  25. #include <unordered_set>
  26. namespace absl {
  27. ABSL_NAMESPACE_BEGIN
  28. namespace str_format_internal {
  29. using CC = FormatConversionCharInternal;
  30. using LM = LengthMod;
  31. // Abbreviations to fit in the table below.
  32. constexpr auto f_sign = Flags::kSignCol;
  33. constexpr auto f_alt = Flags::kAlt;
  34. constexpr auto f_pos = Flags::kShowPos;
  35. constexpr auto f_left = Flags::kLeft;
  36. constexpr auto f_zero = Flags::kZero;
  37. ABSL_CONST_INIT const ConvTag kTags[256] = {
  38. {}, {}, {}, {}, {}, {}, {}, {}, // 00-07
  39. {}, {}, {}, {}, {}, {}, {}, {}, // 08-0f
  40. {}, {}, {}, {}, {}, {}, {}, {}, // 10-17
  41. {}, {}, {}, {}, {}, {}, {}, {}, // 18-1f
  42. f_sign, {}, {}, f_alt, {}, {}, {}, {}, // !"#$%&'
  43. {}, {}, {}, f_pos, {}, f_left, {}, {}, // ()*+,-./
  44. f_zero, {}, {}, {}, {}, {}, {}, {}, // 01234567
  45. {}, {}, {}, {}, {}, {}, {}, {}, // 89:;<=>?
  46. {}, CC::A, {}, {}, {}, CC::E, CC::F, CC::G, // @ABCDEFG
  47. {}, {}, {}, {}, LM::L, {}, {}, {}, // HIJKLMNO
  48. {}, {}, {}, {}, {}, {}, {}, {}, // PQRSTUVW
  49. CC::X, {}, {}, {}, {}, {}, {}, {}, // XYZ[\]^_
  50. {}, CC::a, {}, CC::c, CC::d, CC::e, CC::f, CC::g, // `abcdefg
  51. LM::h, CC::i, LM::j, {}, LM::l, {}, CC::n, CC::o, // hijklmno
  52. CC::p, LM::q, {}, CC::s, LM::t, CC::u, {}, {}, // pqrstuvw
  53. CC::x, {}, LM::z, {}, {}, {}, {}, {}, // xyz{|}!
  54. {}, {}, {}, {}, {}, {}, {}, {}, // 80-87
  55. {}, {}, {}, {}, {}, {}, {}, {}, // 88-8f
  56. {}, {}, {}, {}, {}, {}, {}, {}, // 90-97
  57. {}, {}, {}, {}, {}, {}, {}, {}, // 98-9f
  58. {}, {}, {}, {}, {}, {}, {}, {}, // a0-a7
  59. {}, {}, {}, {}, {}, {}, {}, {}, // a8-af
  60. {}, {}, {}, {}, {}, {}, {}, {}, // b0-b7
  61. {}, {}, {}, {}, {}, {}, {}, {}, // b8-bf
  62. {}, {}, {}, {}, {}, {}, {}, {}, // c0-c7
  63. {}, {}, {}, {}, {}, {}, {}, {}, // c8-cf
  64. {}, {}, {}, {}, {}, {}, {}, {}, // d0-d7
  65. {}, {}, {}, {}, {}, {}, {}, {}, // d8-df
  66. {}, {}, {}, {}, {}, {}, {}, {}, // e0-e7
  67. {}, {}, {}, {}, {}, {}, {}, {}, // e8-ef
  68. {}, {}, {}, {}, {}, {}, {}, {}, // f0-f7
  69. {}, {}, {}, {}, {}, {}, {}, {}, // f8-ff
  70. };
  71. namespace {
  72. bool CheckFastPathSetting(const UnboundConversion& conv) {
  73. bool width_precision_needed =
  74. conv.width.value() >= 0 || conv.precision.value() >= 0;
  75. if (width_precision_needed && conv.flags == Flags::kBasic) {
  76. fprintf(stderr,
  77. "basic=%d left=%d show_pos=%d sign_col=%d alt=%d zero=%d "
  78. "width=%d precision=%d\n",
  79. conv.flags == Flags::kBasic ? 1 : 0,
  80. FlagsContains(conv.flags, Flags::kLeft) ? 1 : 0,
  81. FlagsContains(conv.flags, Flags::kShowPos) ? 1 : 0,
  82. FlagsContains(conv.flags, Flags::kSignCol) ? 1 : 0,
  83. FlagsContains(conv.flags, Flags::kAlt) ? 1 : 0,
  84. FlagsContains(conv.flags, Flags::kZero) ? 1 : 0, conv.width.value(),
  85. conv.precision.value());
  86. return false;
  87. }
  88. return true;
  89. }
  90. template <bool is_positional>
  91. const char *ConsumeConversion(const char *pos, const char *const end,
  92. UnboundConversion *conv, int *next_arg) {
  93. const char* const original_pos = pos;
  94. char c;
  95. // Read the next char into `c` and update `pos`. Returns false if there are
  96. // no more chars to read.
  97. #define ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR() \
  98. do { \
  99. if (ABSL_PREDICT_FALSE(pos == end)) return nullptr; \
  100. c = *pos++; \
  101. } while (0)
  102. const auto parse_digits = [&] {
  103. int digits = c - '0';
  104. // We do not want to overflow `digits` so we consume at most digits10
  105. // digits. If there are more digits the parsing will fail later on when the
  106. // digit doesn't match the expected characters.
  107. int num_digits = std::numeric_limits<int>::digits10;
  108. for (;;) {
  109. if (ABSL_PREDICT_FALSE(pos == end)) break;
  110. c = *pos++;
  111. if (!std::isdigit(c)) break;
  112. --num_digits;
  113. if (ABSL_PREDICT_FALSE(!num_digits)) break;
  114. digits = 10 * digits + c - '0';
  115. }
  116. return digits;
  117. };
  118. if (is_positional) {
  119. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  120. if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
  121. conv->arg_position = parse_digits();
  122. assert(conv->arg_position > 0);
  123. if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
  124. }
  125. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  126. // We should start with the basic flag on.
  127. assert(conv->flags == Flags::kBasic);
  128. // Any non alpha character makes this conversion not basic.
  129. // This includes flags (-+ #0), width (1-9, *) or precision (.).
  130. // All conversion characters and length modifiers are alpha characters.
  131. if (c < 'A') {
  132. while (c <= '0') {
  133. auto tag = GetTagForChar(c);
  134. if (tag.is_flags()) {
  135. conv->flags = conv->flags | tag.as_flags();
  136. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  137. } else {
  138. break;
  139. }
  140. }
  141. if (c <= '9') {
  142. if (c >= '0') {
  143. int maybe_width = parse_digits();
  144. if (!is_positional && c == '$') {
  145. if (ABSL_PREDICT_FALSE(*next_arg != 0)) return nullptr;
  146. // Positional conversion.
  147. *next_arg = -1;
  148. return ConsumeConversion<true>(original_pos, end, conv, next_arg);
  149. }
  150. conv->flags = conv->flags | Flags::kNonBasic;
  151. conv->width.set_value(maybe_width);
  152. } else if (c == '*') {
  153. conv->flags = conv->flags | Flags::kNonBasic;
  154. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  155. if (is_positional) {
  156. if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
  157. conv->width.set_from_arg(parse_digits());
  158. if (ABSL_PREDICT_FALSE(c != '$')) return nullptr;
  159. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  160. } else {
  161. conv->width.set_from_arg(++*next_arg);
  162. }
  163. }
  164. }
  165. if (c == '.') {
  166. conv->flags = conv->flags | Flags::kNonBasic;
  167. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  168. if (std::isdigit(c)) {
  169. conv->precision.set_value(parse_digits());
  170. } else if (c == '*') {
  171. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  172. if (is_positional) {
  173. if (ABSL_PREDICT_FALSE(c < '1' || c > '9')) return nullptr;
  174. conv->precision.set_from_arg(parse_digits());
  175. if (c != '$') return nullptr;
  176. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  177. } else {
  178. conv->precision.set_from_arg(++*next_arg);
  179. }
  180. } else {
  181. conv->precision.set_value(0);
  182. }
  183. }
  184. }
  185. auto tag = GetTagForChar(c);
  186. if (ABSL_PREDICT_FALSE(!tag.is_conv())) {
  187. if (ABSL_PREDICT_FALSE(!tag.is_length())) return nullptr;
  188. // It is a length modifier.
  189. using str_format_internal::LengthMod;
  190. LengthMod length_mod = tag.as_length();
  191. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  192. if (c == 'h' && length_mod == LengthMod::h) {
  193. conv->length_mod = LengthMod::hh;
  194. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  195. } else if (c == 'l' && length_mod == LengthMod::l) {
  196. conv->length_mod = LengthMod::ll;
  197. ABSL_FORMAT_PARSER_INTERNAL_GET_CHAR();
  198. } else {
  199. conv->length_mod = length_mod;
  200. }
  201. tag = GetTagForChar(c);
  202. if (ABSL_PREDICT_FALSE(!tag.is_conv())) return nullptr;
  203. }
  204. assert(CheckFastPathSetting(*conv));
  205. (void)(&CheckFastPathSetting);
  206. conv->conv = tag.as_conv();
  207. if (!is_positional) conv->arg_position = ++*next_arg;
  208. return pos;
  209. }
  210. } // namespace
  211. std::string LengthModToString(LengthMod v) {
  212. switch (v) {
  213. case LengthMod::h:
  214. return "h";
  215. case LengthMod::hh:
  216. return "hh";
  217. case LengthMod::l:
  218. return "l";
  219. case LengthMod::ll:
  220. return "ll";
  221. case LengthMod::L:
  222. return "L";
  223. case LengthMod::j:
  224. return "j";
  225. case LengthMod::z:
  226. return "z";
  227. case LengthMod::t:
  228. return "t";
  229. case LengthMod::q:
  230. return "q";
  231. case LengthMod::none:
  232. return "";
  233. }
  234. return "";
  235. }
  236. const char *ConsumeUnboundConversion(const char *p, const char *end,
  237. UnboundConversion *conv, int *next_arg) {
  238. if (*next_arg < 0) return ConsumeConversion<true>(p, end, conv, next_arg);
  239. return ConsumeConversion<false>(p, end, conv, next_arg);
  240. }
  241. struct ParsedFormatBase::ParsedFormatConsumer {
  242. explicit ParsedFormatConsumer(ParsedFormatBase *parsedformat)
  243. : parsed(parsedformat), data_pos(parsedformat->data_.get()) {}
  244. bool Append(string_view s) {
  245. if (s.empty()) return true;
  246. size_t text_end = AppendText(s);
  247. if (!parsed->items_.empty() && !parsed->items_.back().is_conversion) {
  248. // Let's extend the existing text run.
  249. parsed->items_.back().text_end = text_end;
  250. } else {
  251. // Let's make a new text run.
  252. parsed->items_.push_back({false, text_end, {}});
  253. }
  254. return true;
  255. }
  256. bool ConvertOne(const UnboundConversion &conv, string_view s) {
  257. size_t text_end = AppendText(s);
  258. parsed->items_.push_back({true, text_end, conv});
  259. return true;
  260. }
  261. size_t AppendText(string_view s) {
  262. memcpy(data_pos, s.data(), s.size());
  263. data_pos += s.size();
  264. return static_cast<size_t>(data_pos - parsed->data_.get());
  265. }
  266. ParsedFormatBase *parsed;
  267. char* data_pos;
  268. };
  269. ParsedFormatBase::ParsedFormatBase(
  270. string_view format, bool allow_ignored,
  271. std::initializer_list<FormatConversionCharSet> convs)
  272. : data_(format.empty() ? nullptr : new char[format.size()]) {
  273. has_error_ = !ParseFormatString(format, ParsedFormatConsumer(this)) ||
  274. !MatchesConversions(allow_ignored, convs);
  275. }
  276. bool ParsedFormatBase::MatchesConversions(
  277. bool allow_ignored,
  278. std::initializer_list<FormatConversionCharSet> convs) const {
  279. std::unordered_set<int> used;
  280. auto add_if_valid_conv = [&](int pos, char c) {
  281. if (static_cast<size_t>(pos) > convs.size() ||
  282. !Contains(convs.begin()[pos - 1], c))
  283. return false;
  284. used.insert(pos);
  285. return true;
  286. };
  287. for (const ConversionItem &item : items_) {
  288. if (!item.is_conversion) continue;
  289. auto &conv = item.conv;
  290. if (conv.precision.is_from_arg() &&
  291. !add_if_valid_conv(conv.precision.get_from_arg(), '*'))
  292. return false;
  293. if (conv.width.is_from_arg() &&
  294. !add_if_valid_conv(conv.width.get_from_arg(), '*'))
  295. return false;
  296. if (!add_if_valid_conv(conv.arg_position,
  297. FormatConversionCharToChar(conv.conv)))
  298. return false;
  299. }
  300. return used.size() == convs.size() || allow_ignored;
  301. }
  302. } // namespace str_format_internal
  303. ABSL_NAMESPACE_END
  304. } // namespace absl