parser.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. // Copyright 2020 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
  15. #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
  16. #include <limits.h>
  17. #include <stddef.h>
  18. #include <stdlib.h>
  19. #include <cassert>
  20. #include <cstdint>
  21. #include <initializer_list>
  22. #include <iosfwd>
  23. #include <iterator>
  24. #include <memory>
  25. #include <string>
  26. #include <vector>
  27. #include "absl/strings/internal/str_format/checker.h"
  28. #include "absl/strings/internal/str_format/extension.h"
  29. namespace absl {
  30. ABSL_NAMESPACE_BEGIN
  31. namespace str_format_internal {
  32. enum class LengthMod : std::uint8_t { h, hh, l, ll, L, j, z, t, q, none };
  33. std::string LengthModToString(LengthMod v);
  34. // The analyzed properties of a single specified conversion.
  35. struct UnboundConversion {
  36. UnboundConversion() {}
  37. class InputValue {
  38. public:
  39. void set_value(int value) {
  40. assert(value >= 0);
  41. value_ = value;
  42. }
  43. int value() const { return value_; }
  44. // Marks the value as "from arg". aka the '*' format.
  45. // Requires `value >= 1`.
  46. // When set, is_from_arg() return true and get_from_arg() returns the
  47. // original value.
  48. // `value()`'s return value is unspecfied in this state.
  49. void set_from_arg(int value) {
  50. assert(value > 0);
  51. value_ = -value - 1;
  52. }
  53. bool is_from_arg() const { return value_ < -1; }
  54. int get_from_arg() const {
  55. assert(is_from_arg());
  56. return -value_ - 1;
  57. }
  58. private:
  59. int value_ = -1;
  60. };
  61. // No need to initialize. It will always be set in the parser.
  62. int arg_position;
  63. InputValue width;
  64. InputValue precision;
  65. Flags flags = Flags::kBasic;
  66. LengthMod length_mod = LengthMod::none;
  67. FormatConversionChar conv = FormatConversionCharInternal::kNone;
  68. };
  69. // Consume conversion spec prefix (not including '%') of [p, end) if valid.
  70. // Examples of valid specs would be e.g.: "s", "d", "-12.6f".
  71. // If valid, it returns the first character following the conversion spec,
  72. // and the spec part is broken down and returned in 'conv'.
  73. // If invalid, returns nullptr.
  74. const char* ConsumeUnboundConversion(const char* p, const char* end,
  75. UnboundConversion* conv, int* next_arg);
  76. // Helper tag class for the table below.
  77. // It allows fast `char -> ConversionChar/LengthMod/Flags` checking and
  78. // conversions.
  79. class ConvTag {
  80. public:
  81. constexpr ConvTag(FormatConversionChar conversion_char) // NOLINT
  82. : tag_(static_cast<uint8_t>(conversion_char)) {}
  83. constexpr ConvTag(LengthMod length_mod) // NOLINT
  84. : tag_(0x80 | static_cast<uint8_t>(length_mod)) {}
  85. constexpr ConvTag(Flags flags) // NOLINT
  86. : tag_(0xc0 | static_cast<uint8_t>(flags)) {}
  87. constexpr ConvTag() : tag_(0xFF) {}
  88. bool is_conv() const { return (tag_ & 0x80) == 0; }
  89. bool is_length() const { return (tag_ & 0xC0) == 0x80; }
  90. bool is_flags() const { return (tag_ & 0xE0) == 0xC0; }
  91. FormatConversionChar as_conv() const {
  92. assert(is_conv());
  93. assert(!is_length());
  94. assert(!is_flags());
  95. return static_cast<FormatConversionChar>(tag_);
  96. }
  97. LengthMod as_length() const {
  98. assert(!is_conv());
  99. assert(is_length());
  100. assert(!is_flags());
  101. return static_cast<LengthMod>(tag_ & 0x3F);
  102. }
  103. Flags as_flags() const {
  104. assert(!is_conv());
  105. assert(!is_length());
  106. assert(is_flags());
  107. return static_cast<Flags>(tag_ & 0x1F);
  108. }
  109. private:
  110. uint8_t tag_;
  111. };
  112. extern const ConvTag kTags[256];
  113. // Keep a single table for all the conversion chars and length modifiers.
  114. inline ConvTag GetTagForChar(char c) {
  115. return kTags[static_cast<unsigned char>(c)];
  116. }
  117. // Parse the format string provided in 'src' and pass the identified items into
  118. // 'consumer'.
  119. // Text runs will be passed by calling
  120. // Consumer::Append(string_view);
  121. // ConversionItems will be passed by calling
  122. // Consumer::ConvertOne(UnboundConversion, string_view);
  123. // In the case of ConvertOne, the string_view that is passed is the
  124. // portion of the format string corresponding to the conversion, not including
  125. // the leading %. On success, it returns true. On failure, it stops and returns
  126. // false.
  127. template <typename Consumer>
  128. bool ParseFormatString(string_view src, Consumer consumer) {
  129. int next_arg = 0;
  130. const char* p = src.data();
  131. const char* const end = p + src.size();
  132. while (p != end) {
  133. const char* percent = static_cast<const char*>(memchr(p, '%', end - p));
  134. if (!percent) {
  135. // We found the last substring.
  136. return consumer.Append(string_view(p, end - p));
  137. }
  138. // We found a percent, so push the text run then process the percent.
  139. if (ABSL_PREDICT_FALSE(!consumer.Append(string_view(p, percent - p)))) {
  140. return false;
  141. }
  142. if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
  143. auto tag = GetTagForChar(percent[1]);
  144. if (tag.is_conv()) {
  145. if (ABSL_PREDICT_FALSE(next_arg < 0)) {
  146. // This indicates an error in the format string.
  147. // The only way to get `next_arg < 0` here is to have a positional
  148. // argument first which sets next_arg to -1 and then a non-positional
  149. // argument.
  150. return false;
  151. }
  152. p = percent + 2;
  153. // Keep this case separate from the one below.
  154. // ConvertOne is more efficient when the compiler can see that the `basic`
  155. // flag is set.
  156. UnboundConversion conv;
  157. conv.conv = tag.as_conv();
  158. conv.arg_position = ++next_arg;
  159. if (ABSL_PREDICT_FALSE(
  160. !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
  161. return false;
  162. }
  163. } else if (percent[1] != '%') {
  164. UnboundConversion conv;
  165. p = ConsumeUnboundConversion(percent + 1, end, &conv, &next_arg);
  166. if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
  167. if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
  168. conv, string_view(percent + 1, p - (percent + 1))))) {
  169. return false;
  170. }
  171. } else {
  172. if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
  173. p = percent + 2;
  174. continue;
  175. }
  176. }
  177. return true;
  178. }
  179. // Always returns true, or fails to compile in a constexpr context if s does not
  180. // point to a constexpr char array.
  181. constexpr bool EnsureConstexpr(string_view s) {
  182. return s.empty() || s[0] == s[0];
  183. }
  184. class ParsedFormatBase {
  185. public:
  186. explicit ParsedFormatBase(
  187. string_view format, bool allow_ignored,
  188. std::initializer_list<FormatConversionCharSet> convs);
  189. ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
  190. ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
  191. ParsedFormatBase& operator=(const ParsedFormatBase& other) {
  192. if (this == &other) return *this;
  193. has_error_ = other.has_error_;
  194. items_ = other.items_;
  195. size_t text_size = items_.empty() ? 0 : items_.back().text_end;
  196. data_.reset(new char[text_size]);
  197. memcpy(data_.get(), other.data_.get(), text_size);
  198. return *this;
  199. }
  200. ParsedFormatBase& operator=(ParsedFormatBase&& other) {
  201. if (this == &other) return *this;
  202. has_error_ = other.has_error_;
  203. data_ = std::move(other.data_);
  204. items_ = std::move(other.items_);
  205. // Reset the vector to make sure the invariants hold.
  206. other.items_.clear();
  207. return *this;
  208. }
  209. template <typename Consumer>
  210. bool ProcessFormat(Consumer consumer) const {
  211. const char* const base = data_.get();
  212. string_view text(base, 0);
  213. for (const auto& item : items_) {
  214. const char* const end = text.data() + text.size();
  215. text = string_view(end, (base + item.text_end) - end);
  216. if (item.is_conversion) {
  217. if (!consumer.ConvertOne(item.conv, text)) return false;
  218. } else {
  219. if (!consumer.Append(text)) return false;
  220. }
  221. }
  222. return !has_error_;
  223. }
  224. bool has_error() const { return has_error_; }
  225. private:
  226. // Returns whether the conversions match and if !allow_ignored it verifies
  227. // that all conversions are used by the format.
  228. bool MatchesConversions(
  229. bool allow_ignored,
  230. std::initializer_list<FormatConversionCharSet> convs) const;
  231. struct ParsedFormatConsumer;
  232. struct ConversionItem {
  233. bool is_conversion;
  234. // Points to the past-the-end location of this element in the data_ array.
  235. size_t text_end;
  236. UnboundConversion conv;
  237. };
  238. bool has_error_;
  239. std::unique_ptr<char[]> data_;
  240. std::vector<ConversionItem> items_;
  241. };
  242. // A value type representing a preparsed format. These can be created, copied
  243. // around, and reused to speed up formatting loops.
  244. // The user must specify through the template arguments the conversion
  245. // characters used in the format. This will be checked at compile time.
  246. //
  247. // This class uses Conv enum values to specify each argument.
  248. // This allows for more flexibility as you can specify multiple possible
  249. // conversion characters for each argument.
  250. // ParsedFormat<char...> is a simplified alias for when the user only
  251. // needs to specify a single conversion character for each argument.
  252. //
  253. // Example:
  254. // // Extended format supports multiple characters per argument:
  255. // using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
  256. // MyFormat GetFormat(bool use_hex) {
  257. // if (use_hex) return MyFormat("foo %x bar");
  258. // return MyFormat("foo %d bar");
  259. // }
  260. // // 'format' can be used with any value that supports 'd' and 'x',
  261. // // like `int`.
  262. // auto format = GetFormat(use_hex);
  263. // value = StringF(format, i);
  264. //
  265. // This class also supports runtime format checking with the ::New() and
  266. // ::NewAllowIgnored() factory functions.
  267. // This is the only API that allows the user to pass a runtime specified format
  268. // string. These factory functions will return NULL if the format does not match
  269. // the conversions requested by the user.
  270. template <FormatConversionCharSet... C>
  271. class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
  272. public:
  273. explicit ExtendedParsedFormat(string_view format)
  274. #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
  275. __attribute__((
  276. enable_if(str_format_internal::EnsureConstexpr(format),
  277. "Format string is not constexpr."),
  278. enable_if(str_format_internal::ValidFormatImpl<C...>(format),
  279. "Format specified does not match the template arguments.")))
  280. #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
  281. : ExtendedParsedFormat(format, false) {
  282. }
  283. // ExtendedParsedFormat factory function.
  284. // The user still has to specify the conversion characters, but they will not
  285. // be checked at compile time. Instead, it will be checked at runtime.
  286. // This delays the checking to runtime, but allows the user to pass
  287. // dynamically sourced formats.
  288. // It returns NULL if the format does not match the conversion characters.
  289. // The user is responsible for checking the return value before using it.
  290. //
  291. // The 'New' variant will check that all the specified arguments are being
  292. // consumed by the format and return NULL if any argument is being ignored.
  293. // The 'NewAllowIgnored' variant will not verify this and will allow formats
  294. // that ignore arguments.
  295. static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
  296. return New(format, false);
  297. }
  298. static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
  299. string_view format) {
  300. return New(format, true);
  301. }
  302. private:
  303. static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
  304. bool allow_ignored) {
  305. std::unique_ptr<ExtendedParsedFormat> conv(
  306. new ExtendedParsedFormat(format, allow_ignored));
  307. if (conv->has_error()) return nullptr;
  308. return conv;
  309. }
  310. ExtendedParsedFormat(string_view s, bool allow_ignored)
  311. : ParsedFormatBase(s, allow_ignored, {C...}) {}
  312. };
  313. } // namespace str_format_internal
  314. ABSL_NAMESPACE_END
  315. } // namespace absl
  316. #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_