demangle.cc 65 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959
  1. // Copyright 2018 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // For reference check out:
  15. // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
  16. //
  17. // Note that we only have partial C++11 support yet.
  18. #include "absl/debugging/internal/demangle.h"
  19. #include <cstdint>
  20. #include <cstdio>
  21. #include <limits>
  22. namespace absl {
  23. ABSL_NAMESPACE_BEGIN
  24. namespace debugging_internal {
  25. typedef struct {
  26. const char *abbrev;
  27. const char *real_name;
  28. // Number of arguments in <expression> context, or 0 if disallowed.
  29. int arity;
  30. } AbbrevPair;
  31. // List of operators from Itanium C++ ABI.
  32. static const AbbrevPair kOperatorList[] = {
  33. // New has special syntax (not currently supported).
  34. {"nw", "new", 0},
  35. {"na", "new[]", 0},
  36. // Works except that the 'gs' prefix is not supported.
  37. {"dl", "delete", 1},
  38. {"da", "delete[]", 1},
  39. {"ps", "+", 1}, // "positive"
  40. {"ng", "-", 1}, // "negative"
  41. {"ad", "&", 1}, // "address-of"
  42. {"de", "*", 1}, // "dereference"
  43. {"co", "~", 1},
  44. {"pl", "+", 2},
  45. {"mi", "-", 2},
  46. {"ml", "*", 2},
  47. {"dv", "/", 2},
  48. {"rm", "%", 2},
  49. {"an", "&", 2},
  50. {"or", "|", 2},
  51. {"eo", "^", 2},
  52. {"aS", "=", 2},
  53. {"pL", "+=", 2},
  54. {"mI", "-=", 2},
  55. {"mL", "*=", 2},
  56. {"dV", "/=", 2},
  57. {"rM", "%=", 2},
  58. {"aN", "&=", 2},
  59. {"oR", "|=", 2},
  60. {"eO", "^=", 2},
  61. {"ls", "<<", 2},
  62. {"rs", ">>", 2},
  63. {"lS", "<<=", 2},
  64. {"rS", ">>=", 2},
  65. {"eq", "==", 2},
  66. {"ne", "!=", 2},
  67. {"lt", "<", 2},
  68. {"gt", ">", 2},
  69. {"le", "<=", 2},
  70. {"ge", ">=", 2},
  71. {"nt", "!", 1},
  72. {"aa", "&&", 2},
  73. {"oo", "||", 2},
  74. {"pp", "++", 1},
  75. {"mm", "--", 1},
  76. {"cm", ",", 2},
  77. {"pm", "->*", 2},
  78. {"pt", "->", 0}, // Special syntax
  79. {"cl", "()", 0}, // Special syntax
  80. {"ix", "[]", 2},
  81. {"qu", "?", 3},
  82. {"st", "sizeof", 0}, // Special syntax
  83. {"sz", "sizeof", 1}, // Not a real operator name, but used in expressions.
  84. {nullptr, nullptr, 0},
  85. };
  86. // List of builtin types from Itanium C++ ABI.
  87. //
  88. // Invariant: only one- or two-character type abbreviations here.
  89. static const AbbrevPair kBuiltinTypeList[] = {
  90. {"v", "void", 0},
  91. {"w", "wchar_t", 0},
  92. {"b", "bool", 0},
  93. {"c", "char", 0},
  94. {"a", "signed char", 0},
  95. {"h", "unsigned char", 0},
  96. {"s", "short", 0},
  97. {"t", "unsigned short", 0},
  98. {"i", "int", 0},
  99. {"j", "unsigned int", 0},
  100. {"l", "long", 0},
  101. {"m", "unsigned long", 0},
  102. {"x", "long long", 0},
  103. {"y", "unsigned long long", 0},
  104. {"n", "__int128", 0},
  105. {"o", "unsigned __int128", 0},
  106. {"f", "float", 0},
  107. {"d", "double", 0},
  108. {"e", "long double", 0},
  109. {"g", "__float128", 0},
  110. {"z", "ellipsis", 0},
  111. {"De", "decimal128", 0}, // IEEE 754r decimal floating point (128 bits)
  112. {"Dd", "decimal64", 0}, // IEEE 754r decimal floating point (64 bits)
  113. {"Dc", "decltype(auto)", 0},
  114. {"Da", "auto", 0},
  115. {"Dn", "std::nullptr_t", 0}, // i.e., decltype(nullptr)
  116. {"Df", "decimal32", 0}, // IEEE 754r decimal floating point (32 bits)
  117. {"Di", "char32_t", 0},
  118. {"Du", "char8_t", 0},
  119. {"Ds", "char16_t", 0},
  120. {"Dh", "float16", 0}, // IEEE 754r half-precision float (16 bits)
  121. {nullptr, nullptr, 0},
  122. };
  123. // List of substitutions Itanium C++ ABI.
  124. static const AbbrevPair kSubstitutionList[] = {
  125. {"St", "", 0},
  126. {"Sa", "allocator", 0},
  127. {"Sb", "basic_string", 0},
  128. // std::basic_string<char, std::char_traits<char>,std::allocator<char> >
  129. {"Ss", "string", 0},
  130. // std::basic_istream<char, std::char_traits<char> >
  131. {"Si", "istream", 0},
  132. // std::basic_ostream<char, std::char_traits<char> >
  133. {"So", "ostream", 0},
  134. // std::basic_iostream<char, std::char_traits<char> >
  135. {"Sd", "iostream", 0},
  136. {nullptr, nullptr, 0},
  137. };
  138. // State needed for demangling. This struct is copied in almost every stack
  139. // frame, so every byte counts.
  140. typedef struct {
  141. int mangled_idx; // Cursor of mangled name.
  142. int out_cur_idx; // Cursor of output string.
  143. int prev_name_idx; // For constructors/destructors.
  144. signed int prev_name_length : 16; // For constructors/destructors.
  145. signed int nest_level : 15; // For nested names.
  146. unsigned int append : 1; // Append flag.
  147. // Note: for some reason MSVC can't pack "bool append : 1" into the same int
  148. // with the above two fields, so we use an int instead. Amusingly it can pack
  149. // "signed bool" as expected, but relying on that to continue to be a legal
  150. // type seems ill-advised (as it's illegal in at least clang).
  151. } ParseState;
  152. static_assert(sizeof(ParseState) == 4 * sizeof(int),
  153. "unexpected size of ParseState");
  154. // One-off state for demangling that's not subject to backtracking -- either
  155. // constant data, data that's intentionally immune to backtracking (steps), or
  156. // data that would never be changed by backtracking anyway (recursion_depth).
  157. //
  158. // Only one copy of this exists for each call to Demangle, so the size of this
  159. // struct is nearly inconsequential.
  160. typedef struct {
  161. const char *mangled_begin; // Beginning of input string.
  162. char *out; // Beginning of output string.
  163. int out_end_idx; // One past last allowed output character.
  164. int recursion_depth; // For stack exhaustion prevention.
  165. int steps; // Cap how much work we'll do, regardless of depth.
  166. ParseState parse_state; // Backtrackable state copied for most frames.
  167. } State;
  168. namespace {
  169. // Prevent deep recursion / stack exhaustion.
  170. // Also prevent unbounded handling of complex inputs.
  171. class ComplexityGuard {
  172. public:
  173. explicit ComplexityGuard(State *state) : state_(state) {
  174. ++state->recursion_depth;
  175. ++state->steps;
  176. }
  177. ~ComplexityGuard() { --state_->recursion_depth; }
  178. // 256 levels of recursion seems like a reasonable upper limit on depth.
  179. // 128 is not enough to demagle synthetic tests from demangle_unittest.txt:
  180. // "_ZaaZZZZ..." and "_ZaaZcvZcvZ..."
  181. static constexpr int kRecursionDepthLimit = 256;
  182. // We're trying to pick a charitable upper-limit on how many parse steps are
  183. // necessary to handle something that a human could actually make use of.
  184. // This is mostly in place as a bound on how much work we'll do if we are
  185. // asked to demangle an mangled name from an untrusted source, so it should be
  186. // much larger than the largest expected symbol, but much smaller than the
  187. // amount of work we can do in, e.g., a second.
  188. //
  189. // Some real-world symbols from an arbitrary binary started failing between
  190. // 2^12 and 2^13, so we multiply the latter by an extra factor of 16 to set
  191. // the limit.
  192. //
  193. // Spending one second on 2^17 parse steps would require each step to take
  194. // 7.6us, or ~30000 clock cycles, so it's safe to say this can be done in
  195. // under a second.
  196. static constexpr int kParseStepsLimit = 1 << 17;
  197. bool IsTooComplex() const {
  198. return state_->recursion_depth > kRecursionDepthLimit ||
  199. state_->steps > kParseStepsLimit;
  200. }
  201. private:
  202. State *state_;
  203. };
  204. } // namespace
  205. // We don't use strlen() in libc since it's not guaranteed to be async
  206. // signal safe.
  207. static size_t StrLen(const char *str) {
  208. size_t len = 0;
  209. while (*str != '\0') {
  210. ++str;
  211. ++len;
  212. }
  213. return len;
  214. }
  215. // Returns true if "str" has at least "n" characters remaining.
  216. static bool AtLeastNumCharsRemaining(const char *str, int n) {
  217. for (int i = 0; i < n; ++i) {
  218. if (str[i] == '\0') {
  219. return false;
  220. }
  221. }
  222. return true;
  223. }
  224. // Returns true if "str" has "prefix" as a prefix.
  225. static bool StrPrefix(const char *str, const char *prefix) {
  226. size_t i = 0;
  227. while (str[i] != '\0' && prefix[i] != '\0' && str[i] == prefix[i]) {
  228. ++i;
  229. }
  230. return prefix[i] == '\0'; // Consumed everything in "prefix".
  231. }
  232. static void InitState(State *state, const char *mangled, char *out,
  233. int out_size) {
  234. state->mangled_begin = mangled;
  235. state->out = out;
  236. state->out_end_idx = out_size;
  237. state->recursion_depth = 0;
  238. state->steps = 0;
  239. state->parse_state.mangled_idx = 0;
  240. state->parse_state.out_cur_idx = 0;
  241. state->parse_state.prev_name_idx = 0;
  242. state->parse_state.prev_name_length = -1;
  243. state->parse_state.nest_level = -1;
  244. state->parse_state.append = true;
  245. }
  246. static inline const char *RemainingInput(State *state) {
  247. return &state->mangled_begin[state->parse_state.mangled_idx];
  248. }
  249. // Returns true and advances "mangled_idx" if we find "one_char_token"
  250. // at "mangled_idx" position. It is assumed that "one_char_token" does
  251. // not contain '\0'.
  252. static bool ParseOneCharToken(State *state, const char one_char_token) {
  253. ComplexityGuard guard(state);
  254. if (guard.IsTooComplex()) return false;
  255. if (RemainingInput(state)[0] == one_char_token) {
  256. ++state->parse_state.mangled_idx;
  257. return true;
  258. }
  259. return false;
  260. }
  261. // Returns true and advances "mangled_cur" if we find "two_char_token"
  262. // at "mangled_cur" position. It is assumed that "two_char_token" does
  263. // not contain '\0'.
  264. static bool ParseTwoCharToken(State *state, const char *two_char_token) {
  265. ComplexityGuard guard(state);
  266. if (guard.IsTooComplex()) return false;
  267. if (RemainingInput(state)[0] == two_char_token[0] &&
  268. RemainingInput(state)[1] == two_char_token[1]) {
  269. state->parse_state.mangled_idx += 2;
  270. return true;
  271. }
  272. return false;
  273. }
  274. // Returns true and advances "mangled_cur" if we find any character in
  275. // "char_class" at "mangled_cur" position.
  276. static bool ParseCharClass(State *state, const char *char_class) {
  277. ComplexityGuard guard(state);
  278. if (guard.IsTooComplex()) return false;
  279. if (RemainingInput(state)[0] == '\0') {
  280. return false;
  281. }
  282. const char *p = char_class;
  283. for (; *p != '\0'; ++p) {
  284. if (RemainingInput(state)[0] == *p) {
  285. ++state->parse_state.mangled_idx;
  286. return true;
  287. }
  288. }
  289. return false;
  290. }
  291. static bool ParseDigit(State *state, int *digit) {
  292. char c = RemainingInput(state)[0];
  293. if (ParseCharClass(state, "0123456789")) {
  294. if (digit != nullptr) {
  295. *digit = c - '0';
  296. }
  297. return true;
  298. }
  299. return false;
  300. }
  301. // This function is used for handling an optional non-terminal.
  302. static bool Optional(bool /*status*/) { return true; }
  303. // This function is used for handling <non-terminal>+ syntax.
  304. typedef bool (*ParseFunc)(State *);
  305. static bool OneOrMore(ParseFunc parse_func, State *state) {
  306. if (parse_func(state)) {
  307. while (parse_func(state)) {
  308. }
  309. return true;
  310. }
  311. return false;
  312. }
  313. // This function is used for handling <non-terminal>* syntax. The function
  314. // always returns true and must be followed by a termination token or a
  315. // terminating sequence not handled by parse_func (e.g.
  316. // ParseOneCharToken(state, 'E')).
  317. static bool ZeroOrMore(ParseFunc parse_func, State *state) {
  318. while (parse_func(state)) {
  319. }
  320. return true;
  321. }
  322. // Append "str" at "out_cur_idx". If there is an overflow, out_cur_idx is
  323. // set to out_end_idx+1. The output string is ensured to
  324. // always terminate with '\0' as long as there is no overflow.
  325. static void Append(State *state, const char *const str, const int length) {
  326. for (int i = 0; i < length; ++i) {
  327. if (state->parse_state.out_cur_idx + 1 <
  328. state->out_end_idx) { // +1 for '\0'
  329. state->out[state->parse_state.out_cur_idx++] = str[i];
  330. } else {
  331. // signal overflow
  332. state->parse_state.out_cur_idx = state->out_end_idx + 1;
  333. break;
  334. }
  335. }
  336. if (state->parse_state.out_cur_idx < state->out_end_idx) {
  337. state->out[state->parse_state.out_cur_idx] =
  338. '\0'; // Terminate it with '\0'
  339. }
  340. }
  341. // We don't use equivalents in libc to avoid locale issues.
  342. static bool IsLower(char c) { return c >= 'a' && c <= 'z'; }
  343. static bool IsAlpha(char c) {
  344. return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
  345. }
  346. static bool IsDigit(char c) { return c >= '0' && c <= '9'; }
  347. // Returns true if "str" is a function clone suffix. These suffixes are used
  348. // by GCC 4.5.x and later versions (and our locally-modified version of GCC
  349. // 4.4.x) to indicate functions which have been cloned during optimization.
  350. // We treat any sequence (.<alpha>+.<digit>+)+ as a function clone suffix.
  351. // Additionally, '_' is allowed along with the alphanumeric sequence.
  352. static bool IsFunctionCloneSuffix(const char *str) {
  353. size_t i = 0;
  354. while (str[i] != '\0') {
  355. bool parsed = false;
  356. // Consume a single [.<alpha> | _]*[.<digit>]* sequence.
  357. if (str[i] == '.' && (IsAlpha(str[i + 1]) || str[i + 1] == '_')) {
  358. parsed = true;
  359. i += 2;
  360. while (IsAlpha(str[i]) || str[i] == '_') {
  361. ++i;
  362. }
  363. }
  364. if (str[i] == '.' && IsDigit(str[i + 1])) {
  365. parsed = true;
  366. i += 2;
  367. while (IsDigit(str[i])) {
  368. ++i;
  369. }
  370. }
  371. if (!parsed)
  372. return false;
  373. }
  374. return true; // Consumed everything in "str".
  375. }
  376. static bool EndsWith(State *state, const char chr) {
  377. return state->parse_state.out_cur_idx > 0 &&
  378. state->parse_state.out_cur_idx < state->out_end_idx &&
  379. chr == state->out[state->parse_state.out_cur_idx - 1];
  380. }
  381. // Append "str" with some tweaks, iff "append" state is true.
  382. static void MaybeAppendWithLength(State *state, const char *const str,
  383. const int length) {
  384. if (state->parse_state.append && length > 0) {
  385. // Append a space if the output buffer ends with '<' and "str"
  386. // starts with '<' to avoid <<<.
  387. if (str[0] == '<' && EndsWith(state, '<')) {
  388. Append(state, " ", 1);
  389. }
  390. // Remember the last identifier name for ctors/dtors,
  391. // but only if we haven't yet overflown the buffer.
  392. if (state->parse_state.out_cur_idx < state->out_end_idx &&
  393. (IsAlpha(str[0]) || str[0] == '_')) {
  394. state->parse_state.prev_name_idx = state->parse_state.out_cur_idx;
  395. state->parse_state.prev_name_length = length;
  396. }
  397. Append(state, str, length);
  398. }
  399. }
  400. // Appends a positive decimal number to the output if appending is enabled.
  401. static bool MaybeAppendDecimal(State *state, unsigned int val) {
  402. // Max {32-64}-bit unsigned int is 20 digits.
  403. constexpr size_t kMaxLength = 20;
  404. char buf[kMaxLength];
  405. // We can't use itoa or sprintf as neither is specified to be
  406. // async-signal-safe.
  407. if (state->parse_state.append) {
  408. // We can't have a one-before-the-beginning pointer, so instead start with
  409. // one-past-the-end and manipulate one character before the pointer.
  410. char *p = &buf[kMaxLength];
  411. do { // val=0 is the only input that should write a leading zero digit.
  412. *--p = (val % 10) + '0';
  413. val /= 10;
  414. } while (p > buf && val != 0);
  415. // 'p' landed on the last character we set. How convenient.
  416. Append(state, p, kMaxLength - (p - buf));
  417. }
  418. return true;
  419. }
  420. // A convenient wrapper around MaybeAppendWithLength().
  421. // Returns true so that it can be placed in "if" conditions.
  422. static bool MaybeAppend(State *state, const char *const str) {
  423. if (state->parse_state.append) {
  424. int length = StrLen(str);
  425. MaybeAppendWithLength(state, str, length);
  426. }
  427. return true;
  428. }
  429. // This function is used for handling nested names.
  430. static bool EnterNestedName(State *state) {
  431. state->parse_state.nest_level = 0;
  432. return true;
  433. }
  434. // This function is used for handling nested names.
  435. static bool LeaveNestedName(State *state, int16_t prev_value) {
  436. state->parse_state.nest_level = prev_value;
  437. return true;
  438. }
  439. // Disable the append mode not to print function parameters, etc.
  440. static bool DisableAppend(State *state) {
  441. state->parse_state.append = false;
  442. return true;
  443. }
  444. // Restore the append mode to the previous state.
  445. static bool RestoreAppend(State *state, bool prev_value) {
  446. state->parse_state.append = prev_value;
  447. return true;
  448. }
  449. // Increase the nest level for nested names.
  450. static void MaybeIncreaseNestLevel(State *state) {
  451. if (state->parse_state.nest_level > -1) {
  452. ++state->parse_state.nest_level;
  453. }
  454. }
  455. // Appends :: for nested names if necessary.
  456. static void MaybeAppendSeparator(State *state) {
  457. if (state->parse_state.nest_level >= 1) {
  458. MaybeAppend(state, "::");
  459. }
  460. }
  461. // Cancel the last separator if necessary.
  462. static void MaybeCancelLastSeparator(State *state) {
  463. if (state->parse_state.nest_level >= 1 && state->parse_state.append &&
  464. state->parse_state.out_cur_idx >= 2) {
  465. state->parse_state.out_cur_idx -= 2;
  466. state->out[state->parse_state.out_cur_idx] = '\0';
  467. }
  468. }
  469. // Returns true if the identifier of the given length pointed to by
  470. // "mangled_cur" is anonymous namespace.
  471. static bool IdentifierIsAnonymousNamespace(State *state, int length) {
  472. // Returns true if "anon_prefix" is a proper prefix of "mangled_cur".
  473. static const char anon_prefix[] = "_GLOBAL__N_";
  474. return (length > static_cast<int>(sizeof(anon_prefix) - 1) &&
  475. StrPrefix(RemainingInput(state), anon_prefix));
  476. }
  477. // Forward declarations of our parsing functions.
  478. static bool ParseMangledName(State *state);
  479. static bool ParseEncoding(State *state);
  480. static bool ParseName(State *state);
  481. static bool ParseUnscopedName(State *state);
  482. static bool ParseNestedName(State *state);
  483. static bool ParsePrefix(State *state);
  484. static bool ParseUnqualifiedName(State *state);
  485. static bool ParseSourceName(State *state);
  486. static bool ParseLocalSourceName(State *state);
  487. static bool ParseUnnamedTypeName(State *state);
  488. static bool ParseNumber(State *state, int *number_out);
  489. static bool ParseFloatNumber(State *state);
  490. static bool ParseSeqId(State *state);
  491. static bool ParseIdentifier(State *state, int length);
  492. static bool ParseOperatorName(State *state, int *arity);
  493. static bool ParseSpecialName(State *state);
  494. static bool ParseCallOffset(State *state);
  495. static bool ParseNVOffset(State *state);
  496. static bool ParseVOffset(State *state);
  497. static bool ParseCtorDtorName(State *state);
  498. static bool ParseDecltype(State *state);
  499. static bool ParseType(State *state);
  500. static bool ParseCVQualifiers(State *state);
  501. static bool ParseBuiltinType(State *state);
  502. static bool ParseFunctionType(State *state);
  503. static bool ParseBareFunctionType(State *state);
  504. static bool ParseClassEnumType(State *state);
  505. static bool ParseArrayType(State *state);
  506. static bool ParsePointerToMemberType(State *state);
  507. static bool ParseTemplateParam(State *state);
  508. static bool ParseTemplateTemplateParam(State *state);
  509. static bool ParseTemplateArgs(State *state);
  510. static bool ParseTemplateArg(State *state);
  511. static bool ParseBaseUnresolvedName(State *state);
  512. static bool ParseUnresolvedName(State *state);
  513. static bool ParseExpression(State *state);
  514. static bool ParseExprPrimary(State *state);
  515. static bool ParseExprCastValue(State *state);
  516. static bool ParseLocalName(State *state);
  517. static bool ParseLocalNameSuffix(State *state);
  518. static bool ParseDiscriminator(State *state);
  519. static bool ParseSubstitution(State *state, bool accept_std);
  520. // Implementation note: the following code is a straightforward
  521. // translation of the Itanium C++ ABI defined in BNF with a couple of
  522. // exceptions.
  523. //
  524. // - Support GNU extensions not defined in the Itanium C++ ABI
  525. // - <prefix> and <template-prefix> are combined to avoid infinite loop
  526. // - Reorder patterns to shorten the code
  527. // - Reorder patterns to give greedier functions precedence
  528. // We'll mark "Less greedy than" for these cases in the code
  529. //
  530. // Each parsing function changes the parse state and returns true on
  531. // success, or returns false and doesn't change the parse state (note:
  532. // the parse-steps counter increases regardless of success or failure).
  533. // To ensure that the parse state isn't changed in the latter case, we
  534. // save the original state before we call multiple parsing functions
  535. // consecutively with &&, and restore it if unsuccessful. See
  536. // ParseEncoding() as an example of this convention. We follow the
  537. // convention throughout the code.
  538. //
  539. // Originally we tried to do demangling without following the full ABI
  540. // syntax but it turned out we needed to follow the full syntax to
  541. // parse complicated cases like nested template arguments. Note that
  542. // implementing a full-fledged demangler isn't trivial (libiberty's
  543. // cp-demangle.c has +4300 lines).
  544. //
  545. // Note that (foo) in <(foo) ...> is a modifier to be ignored.
  546. //
  547. // Reference:
  548. // - Itanium C++ ABI
  549. // <https://mentorembedded.github.io/cxx-abi/abi.html#mangling>
  550. // <mangled-name> ::= _Z <encoding>
  551. static bool ParseMangledName(State *state) {
  552. ComplexityGuard guard(state);
  553. if (guard.IsTooComplex()) return false;
  554. return ParseTwoCharToken(state, "_Z") && ParseEncoding(state);
  555. }
  556. // <encoding> ::= <(function) name> <bare-function-type>
  557. // ::= <(data) name>
  558. // ::= <special-name>
  559. static bool ParseEncoding(State *state) {
  560. ComplexityGuard guard(state);
  561. if (guard.IsTooComplex()) return false;
  562. // Implementing the first two productions together as <name>
  563. // [<bare-function-type>] avoids exponential blowup of backtracking.
  564. //
  565. // Since Optional(...) can't fail, there's no need to copy the state for
  566. // backtracking.
  567. if (ParseName(state) && Optional(ParseBareFunctionType(state))) {
  568. return true;
  569. }
  570. if (ParseSpecialName(state)) {
  571. return true;
  572. }
  573. return false;
  574. }
  575. // <name> ::= <nested-name>
  576. // ::= <unscoped-template-name> <template-args>
  577. // ::= <unscoped-name>
  578. // ::= <local-name>
  579. static bool ParseName(State *state) {
  580. ComplexityGuard guard(state);
  581. if (guard.IsTooComplex()) return false;
  582. if (ParseNestedName(state) || ParseLocalName(state)) {
  583. return true;
  584. }
  585. // We reorganize the productions to avoid re-parsing unscoped names.
  586. // - Inline <unscoped-template-name> productions:
  587. // <name> ::= <substitution> <template-args>
  588. // ::= <unscoped-name> <template-args>
  589. // ::= <unscoped-name>
  590. // - Merge the two productions that start with unscoped-name:
  591. // <name> ::= <unscoped-name> [<template-args>]
  592. ParseState copy = state->parse_state;
  593. // "std<...>" isn't a valid name.
  594. if (ParseSubstitution(state, /*accept_std=*/false) &&
  595. ParseTemplateArgs(state)) {
  596. return true;
  597. }
  598. state->parse_state = copy;
  599. // Note there's no need to restore state after this since only the first
  600. // subparser can fail.
  601. return ParseUnscopedName(state) && Optional(ParseTemplateArgs(state));
  602. }
  603. // <unscoped-name> ::= <unqualified-name>
  604. // ::= St <unqualified-name>
  605. static bool ParseUnscopedName(State *state) {
  606. ComplexityGuard guard(state);
  607. if (guard.IsTooComplex()) return false;
  608. if (ParseUnqualifiedName(state)) {
  609. return true;
  610. }
  611. ParseState copy = state->parse_state;
  612. if (ParseTwoCharToken(state, "St") && MaybeAppend(state, "std::") &&
  613. ParseUnqualifiedName(state)) {
  614. return true;
  615. }
  616. state->parse_state = copy;
  617. return false;
  618. }
  619. // <ref-qualifer> ::= R // lvalue method reference qualifier
  620. // ::= O // rvalue method reference qualifier
  621. static inline bool ParseRefQualifier(State *state) {
  622. return ParseCharClass(state, "OR");
  623. }
  624. // <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix>
  625. // <unqualified-name> E
  626. // ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix>
  627. // <template-args> E
  628. static bool ParseNestedName(State *state) {
  629. ComplexityGuard guard(state);
  630. if (guard.IsTooComplex()) return false;
  631. ParseState copy = state->parse_state;
  632. if (ParseOneCharToken(state, 'N') && EnterNestedName(state) &&
  633. Optional(ParseCVQualifiers(state)) &&
  634. Optional(ParseRefQualifier(state)) && ParsePrefix(state) &&
  635. LeaveNestedName(state, copy.nest_level) &&
  636. ParseOneCharToken(state, 'E')) {
  637. return true;
  638. }
  639. state->parse_state = copy;
  640. return false;
  641. }
  642. // This part is tricky. If we literally translate them to code, we'll
  643. // end up infinite loop. Hence we merge them to avoid the case.
  644. //
  645. // <prefix> ::= <prefix> <unqualified-name>
  646. // ::= <template-prefix> <template-args>
  647. // ::= <template-param>
  648. // ::= <substitution>
  649. // ::= # empty
  650. // <template-prefix> ::= <prefix> <(template) unqualified-name>
  651. // ::= <template-param>
  652. // ::= <substitution>
  653. static bool ParsePrefix(State *state) {
  654. ComplexityGuard guard(state);
  655. if (guard.IsTooComplex()) return false;
  656. bool has_something = false;
  657. while (true) {
  658. MaybeAppendSeparator(state);
  659. if (ParseTemplateParam(state) ||
  660. ParseSubstitution(state, /*accept_std=*/true) ||
  661. ParseUnscopedName(state) ||
  662. (ParseOneCharToken(state, 'M') && ParseUnnamedTypeName(state))) {
  663. has_something = true;
  664. MaybeIncreaseNestLevel(state);
  665. continue;
  666. }
  667. MaybeCancelLastSeparator(state);
  668. if (has_something && ParseTemplateArgs(state)) {
  669. return ParsePrefix(state);
  670. } else {
  671. break;
  672. }
  673. }
  674. return true;
  675. }
  676. // <unqualified-name> ::= <operator-name>
  677. // ::= <ctor-dtor-name>
  678. // ::= <source-name>
  679. // ::= <local-source-name> // GCC extension; see below.
  680. // ::= <unnamed-type-name>
  681. static bool ParseUnqualifiedName(State *state) {
  682. ComplexityGuard guard(state);
  683. if (guard.IsTooComplex()) return false;
  684. return (ParseOperatorName(state, nullptr) || ParseCtorDtorName(state) ||
  685. ParseSourceName(state) || ParseLocalSourceName(state) ||
  686. ParseUnnamedTypeName(state));
  687. }
  688. // <source-name> ::= <positive length number> <identifier>
  689. static bool ParseSourceName(State *state) {
  690. ComplexityGuard guard(state);
  691. if (guard.IsTooComplex()) return false;
  692. ParseState copy = state->parse_state;
  693. int length = -1;
  694. if (ParseNumber(state, &length) && ParseIdentifier(state, length)) {
  695. return true;
  696. }
  697. state->parse_state = copy;
  698. return false;
  699. }
  700. // <local-source-name> ::= L <source-name> [<discriminator>]
  701. //
  702. // References:
  703. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=31775
  704. // https://gcc.gnu.org/viewcvs?view=rev&revision=124467
  705. static bool ParseLocalSourceName(State *state) {
  706. ComplexityGuard guard(state);
  707. if (guard.IsTooComplex()) return false;
  708. ParseState copy = state->parse_state;
  709. if (ParseOneCharToken(state, 'L') && ParseSourceName(state) &&
  710. Optional(ParseDiscriminator(state))) {
  711. return true;
  712. }
  713. state->parse_state = copy;
  714. return false;
  715. }
  716. // <unnamed-type-name> ::= Ut [<(nonnegative) number>] _
  717. // ::= <closure-type-name>
  718. // <closure-type-name> ::= Ul <lambda-sig> E [<(nonnegative) number>] _
  719. // <lambda-sig> ::= <(parameter) type>+
  720. static bool ParseUnnamedTypeName(State *state) {
  721. ComplexityGuard guard(state);
  722. if (guard.IsTooComplex()) return false;
  723. ParseState copy = state->parse_state;
  724. // Type's 1-based index n is encoded as { "", n == 1; itoa(n-2), otherwise }.
  725. // Optionally parse the encoded value into 'which' and add 2 to get the index.
  726. int which = -1;
  727. // Unnamed type local to function or class.
  728. if (ParseTwoCharToken(state, "Ut") && Optional(ParseNumber(state, &which)) &&
  729. which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
  730. ParseOneCharToken(state, '_')) {
  731. MaybeAppend(state, "{unnamed type#");
  732. MaybeAppendDecimal(state, 2 + which);
  733. MaybeAppend(state, "}");
  734. return true;
  735. }
  736. state->parse_state = copy;
  737. // Closure type.
  738. which = -1;
  739. if (ParseTwoCharToken(state, "Ul") && DisableAppend(state) &&
  740. OneOrMore(ParseType, state) && RestoreAppend(state, copy.append) &&
  741. ParseOneCharToken(state, 'E') && Optional(ParseNumber(state, &which)) &&
  742. which <= std::numeric_limits<int>::max() - 2 && // Don't overflow.
  743. ParseOneCharToken(state, '_')) {
  744. MaybeAppend(state, "{lambda()#");
  745. MaybeAppendDecimal(state, 2 + which);
  746. MaybeAppend(state, "}");
  747. return true;
  748. }
  749. state->parse_state = copy;
  750. return false;
  751. }
  752. // <number> ::= [n] <non-negative decimal integer>
  753. // If "number_out" is non-null, then *number_out is set to the value of the
  754. // parsed number on success.
  755. static bool ParseNumber(State *state, int *number_out) {
  756. ComplexityGuard guard(state);
  757. if (guard.IsTooComplex()) return false;
  758. bool negative = false;
  759. if (ParseOneCharToken(state, 'n')) {
  760. negative = true;
  761. }
  762. const char *p = RemainingInput(state);
  763. uint64_t number = 0;
  764. for (; *p != '\0'; ++p) {
  765. if (IsDigit(*p)) {
  766. number = number * 10 + (*p - '0');
  767. } else {
  768. break;
  769. }
  770. }
  771. // Apply the sign with uint64_t arithmetic so overflows aren't UB. Gives
  772. // "incorrect" results for out-of-range inputs, but negative values only
  773. // appear for literals, which aren't printed.
  774. if (negative) {
  775. number = ~number + 1;
  776. }
  777. if (p != RemainingInput(state)) { // Conversion succeeded.
  778. state->parse_state.mangled_idx += p - RemainingInput(state);
  779. if (number_out != nullptr) {
  780. // Note: possibly truncate "number".
  781. *number_out = number;
  782. }
  783. return true;
  784. }
  785. return false;
  786. }
  787. // Floating-point literals are encoded using a fixed-length lowercase
  788. // hexadecimal string.
  789. static bool ParseFloatNumber(State *state) {
  790. ComplexityGuard guard(state);
  791. if (guard.IsTooComplex()) return false;
  792. const char *p = RemainingInput(state);
  793. for (; *p != '\0'; ++p) {
  794. if (!IsDigit(*p) && !(*p >= 'a' && *p <= 'f')) {
  795. break;
  796. }
  797. }
  798. if (p != RemainingInput(state)) { // Conversion succeeded.
  799. state->parse_state.mangled_idx += p - RemainingInput(state);
  800. return true;
  801. }
  802. return false;
  803. }
  804. // The <seq-id> is a sequence number in base 36,
  805. // using digits and upper case letters
  806. static bool ParseSeqId(State *state) {
  807. ComplexityGuard guard(state);
  808. if (guard.IsTooComplex()) return false;
  809. const char *p = RemainingInput(state);
  810. for (; *p != '\0'; ++p) {
  811. if (!IsDigit(*p) && !(*p >= 'A' && *p <= 'Z')) {
  812. break;
  813. }
  814. }
  815. if (p != RemainingInput(state)) { // Conversion succeeded.
  816. state->parse_state.mangled_idx += p - RemainingInput(state);
  817. return true;
  818. }
  819. return false;
  820. }
  821. // <identifier> ::= <unqualified source code identifier> (of given length)
  822. static bool ParseIdentifier(State *state, int length) {
  823. ComplexityGuard guard(state);
  824. if (guard.IsTooComplex()) return false;
  825. if (length < 0 || !AtLeastNumCharsRemaining(RemainingInput(state), length)) {
  826. return false;
  827. }
  828. if (IdentifierIsAnonymousNamespace(state, length)) {
  829. MaybeAppend(state, "(anonymous namespace)");
  830. } else {
  831. MaybeAppendWithLength(state, RemainingInput(state), length);
  832. }
  833. state->parse_state.mangled_idx += length;
  834. return true;
  835. }
  836. // <operator-name> ::= nw, and other two letters cases
  837. // ::= cv <type> # (cast)
  838. // ::= v <digit> <source-name> # vendor extended operator
  839. static bool ParseOperatorName(State *state, int *arity) {
  840. ComplexityGuard guard(state);
  841. if (guard.IsTooComplex()) return false;
  842. if (!AtLeastNumCharsRemaining(RemainingInput(state), 2)) {
  843. return false;
  844. }
  845. // First check with "cv" (cast) case.
  846. ParseState copy = state->parse_state;
  847. if (ParseTwoCharToken(state, "cv") && MaybeAppend(state, "operator ") &&
  848. EnterNestedName(state) && ParseType(state) &&
  849. LeaveNestedName(state, copy.nest_level)) {
  850. if (arity != nullptr) {
  851. *arity = 1;
  852. }
  853. return true;
  854. }
  855. state->parse_state = copy;
  856. // Then vendor extended operators.
  857. if (ParseOneCharToken(state, 'v') && ParseDigit(state, arity) &&
  858. ParseSourceName(state)) {
  859. return true;
  860. }
  861. state->parse_state = copy;
  862. // Other operator names should start with a lower alphabet followed
  863. // by a lower/upper alphabet.
  864. if (!(IsLower(RemainingInput(state)[0]) &&
  865. IsAlpha(RemainingInput(state)[1]))) {
  866. return false;
  867. }
  868. // We may want to perform a binary search if we really need speed.
  869. const AbbrevPair *p;
  870. for (p = kOperatorList; p->abbrev != nullptr; ++p) {
  871. if (RemainingInput(state)[0] == p->abbrev[0] &&
  872. RemainingInput(state)[1] == p->abbrev[1]) {
  873. if (arity != nullptr) {
  874. *arity = p->arity;
  875. }
  876. MaybeAppend(state, "operator");
  877. if (IsLower(*p->real_name)) { // new, delete, etc.
  878. MaybeAppend(state, " ");
  879. }
  880. MaybeAppend(state, p->real_name);
  881. state->parse_state.mangled_idx += 2;
  882. return true;
  883. }
  884. }
  885. return false;
  886. }
  887. // <special-name> ::= TV <type>
  888. // ::= TT <type>
  889. // ::= TI <type>
  890. // ::= TS <type>
  891. // ::= TH <type> # thread-local
  892. // ::= Tc <call-offset> <call-offset> <(base) encoding>
  893. // ::= GV <(object) name>
  894. // ::= T <call-offset> <(base) encoding>
  895. // G++ extensions:
  896. // ::= TC <type> <(offset) number> _ <(base) type>
  897. // ::= TF <type>
  898. // ::= TJ <type>
  899. // ::= GR <name>
  900. // ::= GA <encoding>
  901. // ::= Th <call-offset> <(base) encoding>
  902. // ::= Tv <call-offset> <(base) encoding>
  903. //
  904. // Note: we don't care much about them since they don't appear in
  905. // stack traces. The are special data.
  906. static bool ParseSpecialName(State *state) {
  907. ComplexityGuard guard(state);
  908. if (guard.IsTooComplex()) return false;
  909. ParseState copy = state->parse_state;
  910. if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "VTISH") &&
  911. ParseType(state)) {
  912. return true;
  913. }
  914. state->parse_state = copy;
  915. if (ParseTwoCharToken(state, "Tc") && ParseCallOffset(state) &&
  916. ParseCallOffset(state) && ParseEncoding(state)) {
  917. return true;
  918. }
  919. state->parse_state = copy;
  920. if (ParseTwoCharToken(state, "GV") && ParseName(state)) {
  921. return true;
  922. }
  923. state->parse_state = copy;
  924. if (ParseOneCharToken(state, 'T') && ParseCallOffset(state) &&
  925. ParseEncoding(state)) {
  926. return true;
  927. }
  928. state->parse_state = copy;
  929. // G++ extensions
  930. if (ParseTwoCharToken(state, "TC") && ParseType(state) &&
  931. ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
  932. DisableAppend(state) && ParseType(state)) {
  933. RestoreAppend(state, copy.append);
  934. return true;
  935. }
  936. state->parse_state = copy;
  937. if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "FJ") &&
  938. ParseType(state)) {
  939. return true;
  940. }
  941. state->parse_state = copy;
  942. if (ParseTwoCharToken(state, "GR") && ParseName(state)) {
  943. return true;
  944. }
  945. state->parse_state = copy;
  946. if (ParseTwoCharToken(state, "GA") && ParseEncoding(state)) {
  947. return true;
  948. }
  949. state->parse_state = copy;
  950. if (ParseOneCharToken(state, 'T') && ParseCharClass(state, "hv") &&
  951. ParseCallOffset(state) && ParseEncoding(state)) {
  952. return true;
  953. }
  954. state->parse_state = copy;
  955. return false;
  956. }
  957. // <call-offset> ::= h <nv-offset> _
  958. // ::= v <v-offset> _
  959. static bool ParseCallOffset(State *state) {
  960. ComplexityGuard guard(state);
  961. if (guard.IsTooComplex()) return false;
  962. ParseState copy = state->parse_state;
  963. if (ParseOneCharToken(state, 'h') && ParseNVOffset(state) &&
  964. ParseOneCharToken(state, '_')) {
  965. return true;
  966. }
  967. state->parse_state = copy;
  968. if (ParseOneCharToken(state, 'v') && ParseVOffset(state) &&
  969. ParseOneCharToken(state, '_')) {
  970. return true;
  971. }
  972. state->parse_state = copy;
  973. return false;
  974. }
  975. // <nv-offset> ::= <(offset) number>
  976. static bool ParseNVOffset(State *state) {
  977. ComplexityGuard guard(state);
  978. if (guard.IsTooComplex()) return false;
  979. return ParseNumber(state, nullptr);
  980. }
  981. // <v-offset> ::= <(offset) number> _ <(virtual offset) number>
  982. static bool ParseVOffset(State *state) {
  983. ComplexityGuard guard(state);
  984. if (guard.IsTooComplex()) return false;
  985. ParseState copy = state->parse_state;
  986. if (ParseNumber(state, nullptr) && ParseOneCharToken(state, '_') &&
  987. ParseNumber(state, nullptr)) {
  988. return true;
  989. }
  990. state->parse_state = copy;
  991. return false;
  992. }
  993. // <ctor-dtor-name> ::= C1 | C2 | C3 | CI1 <base-class-type> | CI2
  994. // <base-class-type>
  995. // ::= D0 | D1 | D2
  996. // # GCC extensions: "unified" constructor/destructor. See
  997. // #
  998. // https://github.com/gcc-mirror/gcc/blob/7ad17b583c3643bd4557f29b8391ca7ef08391f5/gcc/cp/mangle.c#L1847
  999. // ::= C4 | D4
  1000. static bool ParseCtorDtorName(State *state) {
  1001. ComplexityGuard guard(state);
  1002. if (guard.IsTooComplex()) return false;
  1003. ParseState copy = state->parse_state;
  1004. if (ParseOneCharToken(state, 'C')) {
  1005. if (ParseCharClass(state, "1234")) {
  1006. const char *const prev_name =
  1007. state->out + state->parse_state.prev_name_idx;
  1008. MaybeAppendWithLength(state, prev_name,
  1009. state->parse_state.prev_name_length);
  1010. return true;
  1011. } else if (ParseOneCharToken(state, 'I') && ParseCharClass(state, "12") &&
  1012. ParseClassEnumType(state)) {
  1013. return true;
  1014. }
  1015. }
  1016. state->parse_state = copy;
  1017. if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "0124")) {
  1018. const char *const prev_name = state->out + state->parse_state.prev_name_idx;
  1019. MaybeAppend(state, "~");
  1020. MaybeAppendWithLength(state, prev_name,
  1021. state->parse_state.prev_name_length);
  1022. return true;
  1023. }
  1024. state->parse_state = copy;
  1025. return false;
  1026. }
  1027. // <decltype> ::= Dt <expression> E # decltype of an id-expression or class
  1028. // # member access (C++0x)
  1029. // ::= DT <expression> E # decltype of an expression (C++0x)
  1030. static bool ParseDecltype(State *state) {
  1031. ComplexityGuard guard(state);
  1032. if (guard.IsTooComplex()) return false;
  1033. ParseState copy = state->parse_state;
  1034. if (ParseOneCharToken(state, 'D') && ParseCharClass(state, "tT") &&
  1035. ParseExpression(state) && ParseOneCharToken(state, 'E')) {
  1036. return true;
  1037. }
  1038. state->parse_state = copy;
  1039. return false;
  1040. }
  1041. // <type> ::= <CV-qualifiers> <type>
  1042. // ::= P <type> # pointer-to
  1043. // ::= R <type> # reference-to
  1044. // ::= O <type> # rvalue reference-to (C++0x)
  1045. // ::= C <type> # complex pair (C 2000)
  1046. // ::= G <type> # imaginary (C 2000)
  1047. // ::= U <source-name> <type> # vendor extended type qualifier
  1048. // ::= <builtin-type>
  1049. // ::= <function-type>
  1050. // ::= <class-enum-type> # note: just an alias for <name>
  1051. // ::= <array-type>
  1052. // ::= <pointer-to-member-type>
  1053. // ::= <template-template-param> <template-args>
  1054. // ::= <template-param>
  1055. // ::= <decltype>
  1056. // ::= <substitution>
  1057. // ::= Dp <type> # pack expansion of (C++0x)
  1058. // ::= Dv <num-elems> _ # GNU vector extension
  1059. //
  1060. static bool ParseType(State *state) {
  1061. ComplexityGuard guard(state);
  1062. if (guard.IsTooComplex()) return false;
  1063. ParseState copy = state->parse_state;
  1064. // We should check CV-qualifers, and PRGC things first.
  1065. //
  1066. // CV-qualifiers overlap with some operator names, but an operator name is not
  1067. // valid as a type. To avoid an ambiguity that can lead to exponential time
  1068. // complexity, refuse to backtrack the CV-qualifiers.
  1069. //
  1070. // _Z4aoeuIrMvvE
  1071. // => _Z 4aoeuI rM v v E
  1072. // aoeu<operator%=, void, void>
  1073. // => _Z 4aoeuI r Mv v E
  1074. // aoeu<void void::* restrict>
  1075. //
  1076. // By consuming the CV-qualifiers first, the former parse is disabled.
  1077. if (ParseCVQualifiers(state)) {
  1078. const bool result = ParseType(state);
  1079. if (!result) state->parse_state = copy;
  1080. return result;
  1081. }
  1082. state->parse_state = copy;
  1083. // Similarly, these tag characters can overlap with other <name>s resulting in
  1084. // two different parse prefixes that land on <template-args> in the same
  1085. // place, such as "C3r1xI...". So, disable the "ctor-name = C3" parse by
  1086. // refusing to backtrack the tag characters.
  1087. if (ParseCharClass(state, "OPRCG")) {
  1088. const bool result = ParseType(state);
  1089. if (!result) state->parse_state = copy;
  1090. return result;
  1091. }
  1092. state->parse_state = copy;
  1093. if (ParseTwoCharToken(state, "Dp") && ParseType(state)) {
  1094. return true;
  1095. }
  1096. state->parse_state = copy;
  1097. if (ParseOneCharToken(state, 'U') && ParseSourceName(state) &&
  1098. ParseType(state)) {
  1099. return true;
  1100. }
  1101. state->parse_state = copy;
  1102. if (ParseBuiltinType(state) || ParseFunctionType(state) ||
  1103. ParseClassEnumType(state) || ParseArrayType(state) ||
  1104. ParsePointerToMemberType(state) || ParseDecltype(state) ||
  1105. // "std" on its own isn't a type.
  1106. ParseSubstitution(state, /*accept_std=*/false)) {
  1107. return true;
  1108. }
  1109. if (ParseTemplateTemplateParam(state) && ParseTemplateArgs(state)) {
  1110. return true;
  1111. }
  1112. state->parse_state = copy;
  1113. // Less greedy than <template-template-param> <template-args>.
  1114. if (ParseTemplateParam(state)) {
  1115. return true;
  1116. }
  1117. if (ParseTwoCharToken(state, "Dv") && ParseNumber(state, nullptr) &&
  1118. ParseOneCharToken(state, '_')) {
  1119. return true;
  1120. }
  1121. state->parse_state = copy;
  1122. return false;
  1123. }
  1124. // <CV-qualifiers> ::= [r] [V] [K]
  1125. // We don't allow empty <CV-qualifiers> to avoid infinite loop in
  1126. // ParseType().
  1127. static bool ParseCVQualifiers(State *state) {
  1128. ComplexityGuard guard(state);
  1129. if (guard.IsTooComplex()) return false;
  1130. int num_cv_qualifiers = 0;
  1131. num_cv_qualifiers += ParseOneCharToken(state, 'r');
  1132. num_cv_qualifiers += ParseOneCharToken(state, 'V');
  1133. num_cv_qualifiers += ParseOneCharToken(state, 'K');
  1134. return num_cv_qualifiers > 0;
  1135. }
  1136. // <builtin-type> ::= v, etc. # single-character builtin types
  1137. // ::= u <source-name>
  1138. // ::= Dd, etc. # two-character builtin types
  1139. //
  1140. // Not supported:
  1141. // ::= DF <number> _ # _FloatN (N bits)
  1142. //
  1143. static bool ParseBuiltinType(State *state) {
  1144. ComplexityGuard guard(state);
  1145. if (guard.IsTooComplex()) return false;
  1146. const AbbrevPair *p;
  1147. for (p = kBuiltinTypeList; p->abbrev != nullptr; ++p) {
  1148. // Guaranteed only 1- or 2-character strings in kBuiltinTypeList.
  1149. if (p->abbrev[1] == '\0') {
  1150. if (ParseOneCharToken(state, p->abbrev[0])) {
  1151. MaybeAppend(state, p->real_name);
  1152. return true;
  1153. }
  1154. } else if (p->abbrev[2] == '\0' && ParseTwoCharToken(state, p->abbrev)) {
  1155. MaybeAppend(state, p->real_name);
  1156. return true;
  1157. }
  1158. }
  1159. ParseState copy = state->parse_state;
  1160. if (ParseOneCharToken(state, 'u') && ParseSourceName(state)) {
  1161. return true;
  1162. }
  1163. state->parse_state = copy;
  1164. return false;
  1165. }
  1166. // <exception-spec> ::= Do # non-throwing
  1167. // exception-specification (e.g.,
  1168. // noexcept, throw())
  1169. // ::= DO <expression> E # computed (instantiation-dependent)
  1170. // noexcept
  1171. // ::= Dw <type>+ E # dynamic exception specification
  1172. // with instantiation-dependent types
  1173. static bool ParseExceptionSpec(State *state) {
  1174. ComplexityGuard guard(state);
  1175. if (guard.IsTooComplex()) return false;
  1176. if (ParseTwoCharToken(state, "Do")) return true;
  1177. ParseState copy = state->parse_state;
  1178. if (ParseTwoCharToken(state, "DO") && ParseExpression(state) &&
  1179. ParseOneCharToken(state, 'E')) {
  1180. return true;
  1181. }
  1182. state->parse_state = copy;
  1183. if (ParseTwoCharToken(state, "Dw") && OneOrMore(ParseType, state) &&
  1184. ParseOneCharToken(state, 'E')) {
  1185. return true;
  1186. }
  1187. state->parse_state = copy;
  1188. return false;
  1189. }
  1190. // <function-type> ::= [exception-spec] F [Y] <bare-function-type> [O] E
  1191. static bool ParseFunctionType(State *state) {
  1192. ComplexityGuard guard(state);
  1193. if (guard.IsTooComplex()) return false;
  1194. ParseState copy = state->parse_state;
  1195. if (Optional(ParseExceptionSpec(state)) && ParseOneCharToken(state, 'F') &&
  1196. Optional(ParseOneCharToken(state, 'Y')) && ParseBareFunctionType(state) &&
  1197. Optional(ParseOneCharToken(state, 'O')) &&
  1198. ParseOneCharToken(state, 'E')) {
  1199. return true;
  1200. }
  1201. state->parse_state = copy;
  1202. return false;
  1203. }
  1204. // <bare-function-type> ::= <(signature) type>+
  1205. static bool ParseBareFunctionType(State *state) {
  1206. ComplexityGuard guard(state);
  1207. if (guard.IsTooComplex()) return false;
  1208. ParseState copy = state->parse_state;
  1209. DisableAppend(state);
  1210. if (OneOrMore(ParseType, state)) {
  1211. RestoreAppend(state, copy.append);
  1212. MaybeAppend(state, "()");
  1213. return true;
  1214. }
  1215. state->parse_state = copy;
  1216. return false;
  1217. }
  1218. // <class-enum-type> ::= <name>
  1219. static bool ParseClassEnumType(State *state) {
  1220. ComplexityGuard guard(state);
  1221. if (guard.IsTooComplex()) return false;
  1222. return ParseName(state);
  1223. }
  1224. // <array-type> ::= A <(positive dimension) number> _ <(element) type>
  1225. // ::= A [<(dimension) expression>] _ <(element) type>
  1226. static bool ParseArrayType(State *state) {
  1227. ComplexityGuard guard(state);
  1228. if (guard.IsTooComplex()) return false;
  1229. ParseState copy = state->parse_state;
  1230. if (ParseOneCharToken(state, 'A') && ParseNumber(state, nullptr) &&
  1231. ParseOneCharToken(state, '_') && ParseType(state)) {
  1232. return true;
  1233. }
  1234. state->parse_state = copy;
  1235. if (ParseOneCharToken(state, 'A') && Optional(ParseExpression(state)) &&
  1236. ParseOneCharToken(state, '_') && ParseType(state)) {
  1237. return true;
  1238. }
  1239. state->parse_state = copy;
  1240. return false;
  1241. }
  1242. // <pointer-to-member-type> ::= M <(class) type> <(member) type>
  1243. static bool ParsePointerToMemberType(State *state) {
  1244. ComplexityGuard guard(state);
  1245. if (guard.IsTooComplex()) return false;
  1246. ParseState copy = state->parse_state;
  1247. if (ParseOneCharToken(state, 'M') && ParseType(state) && ParseType(state)) {
  1248. return true;
  1249. }
  1250. state->parse_state = copy;
  1251. return false;
  1252. }
  1253. // <template-param> ::= T_
  1254. // ::= T <parameter-2 non-negative number> _
  1255. static bool ParseTemplateParam(State *state) {
  1256. ComplexityGuard guard(state);
  1257. if (guard.IsTooComplex()) return false;
  1258. if (ParseTwoCharToken(state, "T_")) {
  1259. MaybeAppend(state, "?"); // We don't support template substitutions.
  1260. return true;
  1261. }
  1262. ParseState copy = state->parse_state;
  1263. if (ParseOneCharToken(state, 'T') && ParseNumber(state, nullptr) &&
  1264. ParseOneCharToken(state, '_')) {
  1265. MaybeAppend(state, "?"); // We don't support template substitutions.
  1266. return true;
  1267. }
  1268. state->parse_state = copy;
  1269. return false;
  1270. }
  1271. // <template-template-param> ::= <template-param>
  1272. // ::= <substitution>
  1273. static bool ParseTemplateTemplateParam(State *state) {
  1274. ComplexityGuard guard(state);
  1275. if (guard.IsTooComplex()) return false;
  1276. return (ParseTemplateParam(state) ||
  1277. // "std" on its own isn't a template.
  1278. ParseSubstitution(state, /*accept_std=*/false));
  1279. }
  1280. // <template-args> ::= I <template-arg>+ E
  1281. static bool ParseTemplateArgs(State *state) {
  1282. ComplexityGuard guard(state);
  1283. if (guard.IsTooComplex()) return false;
  1284. ParseState copy = state->parse_state;
  1285. DisableAppend(state);
  1286. if (ParseOneCharToken(state, 'I') && OneOrMore(ParseTemplateArg, state) &&
  1287. ParseOneCharToken(state, 'E')) {
  1288. RestoreAppend(state, copy.append);
  1289. MaybeAppend(state, "<>");
  1290. return true;
  1291. }
  1292. state->parse_state = copy;
  1293. return false;
  1294. }
  1295. // <template-arg> ::= <type>
  1296. // ::= <expr-primary>
  1297. // ::= J <template-arg>* E # argument pack
  1298. // ::= X <expression> E
  1299. static bool ParseTemplateArg(State *state) {
  1300. ComplexityGuard guard(state);
  1301. if (guard.IsTooComplex()) return false;
  1302. ParseState copy = state->parse_state;
  1303. if (ParseOneCharToken(state, 'J') && ZeroOrMore(ParseTemplateArg, state) &&
  1304. ParseOneCharToken(state, 'E')) {
  1305. return true;
  1306. }
  1307. state->parse_state = copy;
  1308. // There can be significant overlap between the following leading to
  1309. // exponential backtracking:
  1310. //
  1311. // <expr-primary> ::= L <type> <expr-cast-value> E
  1312. // e.g. L 2xxIvE 1 E
  1313. // <type> ==> <local-source-name> <template-args>
  1314. // e.g. L 2xx IvE
  1315. //
  1316. // This means parsing an entire <type> twice, and <type> can contain
  1317. // <template-arg>, so this can generate exponential backtracking. There is
  1318. // only overlap when the remaining input starts with "L <source-name>", so
  1319. // parse all cases that can start this way jointly to share the common prefix.
  1320. //
  1321. // We have:
  1322. //
  1323. // <template-arg> ::= <type>
  1324. // ::= <expr-primary>
  1325. //
  1326. // First, drop all the productions of <type> that must start with something
  1327. // other than 'L'. All that's left is <class-enum-type>; inline it.
  1328. //
  1329. // <type> ::= <nested-name> # starts with 'N'
  1330. // ::= <unscoped-name>
  1331. // ::= <unscoped-template-name> <template-args>
  1332. // ::= <local-name> # starts with 'Z'
  1333. //
  1334. // Drop and inline again:
  1335. //
  1336. // <type> ::= <unscoped-name>
  1337. // ::= <unscoped-name> <template-args>
  1338. // ::= <substitution> <template-args> # starts with 'S'
  1339. //
  1340. // Merge the first two, inline <unscoped-name>, drop last:
  1341. //
  1342. // <type> ::= <unqualified-name> [<template-args>]
  1343. // ::= St <unqualified-name> [<template-args>] # starts with 'S'
  1344. //
  1345. // Drop and inline:
  1346. //
  1347. // <type> ::= <operator-name> [<template-args>] # starts with lowercase
  1348. // ::= <ctor-dtor-name> [<template-args>] # starts with 'C' or 'D'
  1349. // ::= <source-name> [<template-args>] # starts with digit
  1350. // ::= <local-source-name> [<template-args>]
  1351. // ::= <unnamed-type-name> [<template-args>] # starts with 'U'
  1352. //
  1353. // One more time:
  1354. //
  1355. // <type> ::= L <source-name> [<template-args>]
  1356. //
  1357. // Likewise with <expr-primary>:
  1358. //
  1359. // <expr-primary> ::= L <type> <expr-cast-value> E
  1360. // ::= LZ <encoding> E # cannot overlap; drop
  1361. // ::= L <mangled_name> E # cannot overlap; drop
  1362. //
  1363. // By similar reasoning as shown above, the only <type>s starting with
  1364. // <source-name> are "<source-name> [<template-args>]". Inline this.
  1365. //
  1366. // <expr-primary> ::= L <source-name> [<template-args>] <expr-cast-value> E
  1367. //
  1368. // Now inline both of these into <template-arg>:
  1369. //
  1370. // <template-arg> ::= L <source-name> [<template-args>]
  1371. // ::= L <source-name> [<template-args>] <expr-cast-value> E
  1372. //
  1373. // Merge them and we're done:
  1374. // <template-arg>
  1375. // ::= L <source-name> [<template-args>] [<expr-cast-value> E]
  1376. if (ParseLocalSourceName(state) && Optional(ParseTemplateArgs(state))) {
  1377. copy = state->parse_state;
  1378. if (ParseExprCastValue(state) && ParseOneCharToken(state, 'E')) {
  1379. return true;
  1380. }
  1381. state->parse_state = copy;
  1382. return true;
  1383. }
  1384. // Now that the overlapping cases can't reach this code, we can safely call
  1385. // both of these.
  1386. if (ParseType(state) || ParseExprPrimary(state)) {
  1387. return true;
  1388. }
  1389. state->parse_state = copy;
  1390. if (ParseOneCharToken(state, 'X') && ParseExpression(state) &&
  1391. ParseOneCharToken(state, 'E')) {
  1392. return true;
  1393. }
  1394. state->parse_state = copy;
  1395. return false;
  1396. }
  1397. // <unresolved-type> ::= <template-param> [<template-args>]
  1398. // ::= <decltype>
  1399. // ::= <substitution>
  1400. static inline bool ParseUnresolvedType(State *state) {
  1401. // No ComplexityGuard because we don't copy the state in this stack frame.
  1402. return (ParseTemplateParam(state) && Optional(ParseTemplateArgs(state))) ||
  1403. ParseDecltype(state) || ParseSubstitution(state, /*accept_std=*/false);
  1404. }
  1405. // <simple-id> ::= <source-name> [<template-args>]
  1406. static inline bool ParseSimpleId(State *state) {
  1407. // No ComplexityGuard because we don't copy the state in this stack frame.
  1408. // Note: <simple-id> cannot be followed by a parameter pack; see comment in
  1409. // ParseUnresolvedType.
  1410. return ParseSourceName(state) && Optional(ParseTemplateArgs(state));
  1411. }
  1412. // <base-unresolved-name> ::= <source-name> [<template-args>]
  1413. // ::= on <operator-name> [<template-args>]
  1414. // ::= dn <destructor-name>
  1415. static bool ParseBaseUnresolvedName(State *state) {
  1416. ComplexityGuard guard(state);
  1417. if (guard.IsTooComplex()) return false;
  1418. if (ParseSimpleId(state)) {
  1419. return true;
  1420. }
  1421. ParseState copy = state->parse_state;
  1422. if (ParseTwoCharToken(state, "on") && ParseOperatorName(state, nullptr) &&
  1423. Optional(ParseTemplateArgs(state))) {
  1424. return true;
  1425. }
  1426. state->parse_state = copy;
  1427. if (ParseTwoCharToken(state, "dn") &&
  1428. (ParseUnresolvedType(state) || ParseSimpleId(state))) {
  1429. return true;
  1430. }
  1431. state->parse_state = copy;
  1432. return false;
  1433. }
  1434. // <unresolved-name> ::= [gs] <base-unresolved-name>
  1435. // ::= sr <unresolved-type> <base-unresolved-name>
  1436. // ::= srN <unresolved-type> <unresolved-qualifier-level>+ E
  1437. // <base-unresolved-name>
  1438. // ::= [gs] sr <unresolved-qualifier-level>+ E
  1439. // <base-unresolved-name>
  1440. static bool ParseUnresolvedName(State *state) {
  1441. ComplexityGuard guard(state);
  1442. if (guard.IsTooComplex()) return false;
  1443. ParseState copy = state->parse_state;
  1444. if (Optional(ParseTwoCharToken(state, "gs")) &&
  1445. ParseBaseUnresolvedName(state)) {
  1446. return true;
  1447. }
  1448. state->parse_state = copy;
  1449. if (ParseTwoCharToken(state, "sr") && ParseUnresolvedType(state) &&
  1450. ParseBaseUnresolvedName(state)) {
  1451. return true;
  1452. }
  1453. state->parse_state = copy;
  1454. if (ParseTwoCharToken(state, "sr") && ParseOneCharToken(state, 'N') &&
  1455. ParseUnresolvedType(state) &&
  1456. OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
  1457. ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
  1458. return true;
  1459. }
  1460. state->parse_state = copy;
  1461. if (Optional(ParseTwoCharToken(state, "gs")) &&
  1462. ParseTwoCharToken(state, "sr") &&
  1463. OneOrMore(/* <unresolved-qualifier-level> ::= */ ParseSimpleId, state) &&
  1464. ParseOneCharToken(state, 'E') && ParseBaseUnresolvedName(state)) {
  1465. return true;
  1466. }
  1467. state->parse_state = copy;
  1468. return false;
  1469. }
  1470. // <expression> ::= <1-ary operator-name> <expression>
  1471. // ::= <2-ary operator-name> <expression> <expression>
  1472. // ::= <3-ary operator-name> <expression> <expression> <expression>
  1473. // ::= cl <expression>+ E
  1474. // ::= cp <simple-id> <expression>* E # Clang-specific.
  1475. // ::= cv <type> <expression> # type (expression)
  1476. // ::= cv <type> _ <expression>* E # type (expr-list)
  1477. // ::= st <type>
  1478. // ::= <template-param>
  1479. // ::= <function-param>
  1480. // ::= <expr-primary>
  1481. // ::= dt <expression> <unresolved-name> # expr.name
  1482. // ::= pt <expression> <unresolved-name> # expr->name
  1483. // ::= sp <expression> # argument pack expansion
  1484. // ::= sr <type> <unqualified-name> <template-args>
  1485. // ::= sr <type> <unqualified-name>
  1486. // <function-param> ::= fp <(top-level) CV-qualifiers> _
  1487. // ::= fp <(top-level) CV-qualifiers> <number> _
  1488. // ::= fL <number> p <(top-level) CV-qualifiers> _
  1489. // ::= fL <number> p <(top-level) CV-qualifiers> <number> _
  1490. static bool ParseExpression(State *state) {
  1491. ComplexityGuard guard(state);
  1492. if (guard.IsTooComplex()) return false;
  1493. if (ParseTemplateParam(state) || ParseExprPrimary(state)) {
  1494. return true;
  1495. }
  1496. ParseState copy = state->parse_state;
  1497. // Object/function call expression.
  1498. if (ParseTwoCharToken(state, "cl") && OneOrMore(ParseExpression, state) &&
  1499. ParseOneCharToken(state, 'E')) {
  1500. return true;
  1501. }
  1502. state->parse_state = copy;
  1503. // Clang-specific "cp <simple-id> <expression>* E"
  1504. // https://clang.llvm.org/doxygen/ItaniumMangle_8cpp_source.html#l04338
  1505. if (ParseTwoCharToken(state, "cp") && ParseSimpleId(state) &&
  1506. ZeroOrMore(ParseExpression, state) && ParseOneCharToken(state, 'E')) {
  1507. return true;
  1508. }
  1509. state->parse_state = copy;
  1510. // Function-param expression (level 0).
  1511. if (ParseTwoCharToken(state, "fp") && Optional(ParseCVQualifiers(state)) &&
  1512. Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
  1513. return true;
  1514. }
  1515. state->parse_state = copy;
  1516. // Function-param expression (level 1+).
  1517. if (ParseTwoCharToken(state, "fL") && Optional(ParseNumber(state, nullptr)) &&
  1518. ParseOneCharToken(state, 'p') && Optional(ParseCVQualifiers(state)) &&
  1519. Optional(ParseNumber(state, nullptr)) && ParseOneCharToken(state, '_')) {
  1520. return true;
  1521. }
  1522. state->parse_state = copy;
  1523. // Parse the conversion expressions jointly to avoid re-parsing the <type> in
  1524. // their common prefix. Parsed as:
  1525. // <expression> ::= cv <type> <conversion-args>
  1526. // <conversion-args> ::= _ <expression>* E
  1527. // ::= <expression>
  1528. //
  1529. // Also don't try ParseOperatorName after seeing "cv", since ParseOperatorName
  1530. // also needs to accept "cv <type>" in other contexts.
  1531. if (ParseTwoCharToken(state, "cv")) {
  1532. if (ParseType(state)) {
  1533. ParseState copy2 = state->parse_state;
  1534. if (ParseOneCharToken(state, '_') && ZeroOrMore(ParseExpression, state) &&
  1535. ParseOneCharToken(state, 'E')) {
  1536. return true;
  1537. }
  1538. state->parse_state = copy2;
  1539. if (ParseExpression(state)) {
  1540. return true;
  1541. }
  1542. }
  1543. } else {
  1544. // Parse unary, binary, and ternary operator expressions jointly, taking
  1545. // care not to re-parse subexpressions repeatedly. Parse like:
  1546. // <expression> ::= <operator-name> <expression>
  1547. // [<one-to-two-expressions>]
  1548. // <one-to-two-expressions> ::= <expression> [<expression>]
  1549. int arity = -1;
  1550. if (ParseOperatorName(state, &arity) &&
  1551. arity > 0 && // 0 arity => disabled.
  1552. (arity < 3 || ParseExpression(state)) &&
  1553. (arity < 2 || ParseExpression(state)) &&
  1554. (arity < 1 || ParseExpression(state))) {
  1555. return true;
  1556. }
  1557. }
  1558. state->parse_state = copy;
  1559. // sizeof type
  1560. if (ParseTwoCharToken(state, "st") && ParseType(state)) {
  1561. return true;
  1562. }
  1563. state->parse_state = copy;
  1564. // Object and pointer member access expressions.
  1565. if ((ParseTwoCharToken(state, "dt") || ParseTwoCharToken(state, "pt")) &&
  1566. ParseExpression(state) && ParseType(state)) {
  1567. return true;
  1568. }
  1569. state->parse_state = copy;
  1570. // Pointer-to-member access expressions. This parses the same as a binary
  1571. // operator, but it's implemented separately because "ds" shouldn't be
  1572. // accepted in other contexts that parse an operator name.
  1573. if (ParseTwoCharToken(state, "ds") && ParseExpression(state) &&
  1574. ParseExpression(state)) {
  1575. return true;
  1576. }
  1577. state->parse_state = copy;
  1578. // Parameter pack expansion
  1579. if (ParseTwoCharToken(state, "sp") && ParseExpression(state)) {
  1580. return true;
  1581. }
  1582. state->parse_state = copy;
  1583. return ParseUnresolvedName(state);
  1584. }
  1585. // <expr-primary> ::= L <type> <(value) number> E
  1586. // ::= L <type> <(value) float> E
  1587. // ::= L <mangled-name> E
  1588. // // A bug in g++'s C++ ABI version 2 (-fabi-version=2).
  1589. // ::= LZ <encoding> E
  1590. //
  1591. // Warning, subtle: the "bug" LZ production above is ambiguous with the first
  1592. // production where <type> starts with <local-name>, which can lead to
  1593. // exponential backtracking in two scenarios:
  1594. //
  1595. // - When whatever follows the E in the <local-name> in the first production is
  1596. // not a name, we backtrack the whole <encoding> and re-parse the whole thing.
  1597. //
  1598. // - When whatever follows the <local-name> in the first production is not a
  1599. // number and this <expr-primary> may be followed by a name, we backtrack the
  1600. // <name> and re-parse it.
  1601. //
  1602. // Moreover this ambiguity isn't always resolved -- for example, the following
  1603. // has two different parses:
  1604. //
  1605. // _ZaaILZ4aoeuE1x1EvE
  1606. // => operator&&<aoeu, x, E, void>
  1607. // => operator&&<(aoeu::x)(1), void>
  1608. //
  1609. // To resolve this, we just do what GCC's demangler does, and refuse to parse
  1610. // casts to <local-name> types.
  1611. static bool ParseExprPrimary(State *state) {
  1612. ComplexityGuard guard(state);
  1613. if (guard.IsTooComplex()) return false;
  1614. ParseState copy = state->parse_state;
  1615. // The "LZ" special case: if we see LZ, we commit to accept "LZ <encoding> E"
  1616. // or fail, no backtracking.
  1617. if (ParseTwoCharToken(state, "LZ")) {
  1618. if (ParseEncoding(state) && ParseOneCharToken(state, 'E')) {
  1619. return true;
  1620. }
  1621. state->parse_state = copy;
  1622. return false;
  1623. }
  1624. // The merged cast production.
  1625. if (ParseOneCharToken(state, 'L') && ParseType(state) &&
  1626. ParseExprCastValue(state)) {
  1627. return true;
  1628. }
  1629. state->parse_state = copy;
  1630. if (ParseOneCharToken(state, 'L') && ParseMangledName(state) &&
  1631. ParseOneCharToken(state, 'E')) {
  1632. return true;
  1633. }
  1634. state->parse_state = copy;
  1635. return false;
  1636. }
  1637. // <number> or <float>, followed by 'E', as described above ParseExprPrimary.
  1638. static bool ParseExprCastValue(State *state) {
  1639. ComplexityGuard guard(state);
  1640. if (guard.IsTooComplex()) return false;
  1641. // We have to be able to backtrack after accepting a number because we could
  1642. // have e.g. "7fffE", which will accept "7" as a number but then fail to find
  1643. // the 'E'.
  1644. ParseState copy = state->parse_state;
  1645. if (ParseNumber(state, nullptr) && ParseOneCharToken(state, 'E')) {
  1646. return true;
  1647. }
  1648. state->parse_state = copy;
  1649. if (ParseFloatNumber(state) && ParseOneCharToken(state, 'E')) {
  1650. return true;
  1651. }
  1652. state->parse_state = copy;
  1653. return false;
  1654. }
  1655. // <local-name> ::= Z <(function) encoding> E <(entity) name> [<discriminator>]
  1656. // ::= Z <(function) encoding> E s [<discriminator>]
  1657. //
  1658. // Parsing a common prefix of these two productions together avoids an
  1659. // exponential blowup of backtracking. Parse like:
  1660. // <local-name> := Z <encoding> E <local-name-suffix>
  1661. // <local-name-suffix> ::= s [<discriminator>]
  1662. // ::= <name> [<discriminator>]
  1663. static bool ParseLocalNameSuffix(State *state) {
  1664. ComplexityGuard guard(state);
  1665. if (guard.IsTooComplex()) return false;
  1666. if (MaybeAppend(state, "::") && ParseName(state) &&
  1667. Optional(ParseDiscriminator(state))) {
  1668. return true;
  1669. }
  1670. // Since we're not going to overwrite the above "::" by re-parsing the
  1671. // <encoding> (whose trailing '\0' byte was in the byte now holding the
  1672. // first ':'), we have to rollback the "::" if the <name> parse failed.
  1673. if (state->parse_state.append) {
  1674. state->out[state->parse_state.out_cur_idx - 2] = '\0';
  1675. }
  1676. return ParseOneCharToken(state, 's') && Optional(ParseDiscriminator(state));
  1677. }
  1678. static bool ParseLocalName(State *state) {
  1679. ComplexityGuard guard(state);
  1680. if (guard.IsTooComplex()) return false;
  1681. ParseState copy = state->parse_state;
  1682. if (ParseOneCharToken(state, 'Z') && ParseEncoding(state) &&
  1683. ParseOneCharToken(state, 'E') && ParseLocalNameSuffix(state)) {
  1684. return true;
  1685. }
  1686. state->parse_state = copy;
  1687. return false;
  1688. }
  1689. // <discriminator> := _ <(non-negative) number>
  1690. static bool ParseDiscriminator(State *state) {
  1691. ComplexityGuard guard(state);
  1692. if (guard.IsTooComplex()) return false;
  1693. ParseState copy = state->parse_state;
  1694. if (ParseOneCharToken(state, '_') && ParseNumber(state, nullptr)) {
  1695. return true;
  1696. }
  1697. state->parse_state = copy;
  1698. return false;
  1699. }
  1700. // <substitution> ::= S_
  1701. // ::= S <seq-id> _
  1702. // ::= St, etc.
  1703. //
  1704. // "St" is special in that it's not valid as a standalone name, and it *is*
  1705. // allowed to precede a name without being wrapped in "N...E". This means that
  1706. // if we accept it on its own, we can accept "St1a" and try to parse
  1707. // template-args, then fail and backtrack, accept "St" on its own, then "1a" as
  1708. // an unqualified name and re-parse the same template-args. To block this
  1709. // exponential backtracking, we disable it with 'accept_std=false' in
  1710. // problematic contexts.
  1711. static bool ParseSubstitution(State *state, bool accept_std) {
  1712. ComplexityGuard guard(state);
  1713. if (guard.IsTooComplex()) return false;
  1714. if (ParseTwoCharToken(state, "S_")) {
  1715. MaybeAppend(state, "?"); // We don't support substitutions.
  1716. return true;
  1717. }
  1718. ParseState copy = state->parse_state;
  1719. if (ParseOneCharToken(state, 'S') && ParseSeqId(state) &&
  1720. ParseOneCharToken(state, '_')) {
  1721. MaybeAppend(state, "?"); // We don't support substitutions.
  1722. return true;
  1723. }
  1724. state->parse_state = copy;
  1725. // Expand abbreviations like "St" => "std".
  1726. if (ParseOneCharToken(state, 'S')) {
  1727. const AbbrevPair *p;
  1728. for (p = kSubstitutionList; p->abbrev != nullptr; ++p) {
  1729. if (RemainingInput(state)[0] == p->abbrev[1] &&
  1730. (accept_std || p->abbrev[1] != 't')) {
  1731. MaybeAppend(state, "std");
  1732. if (p->real_name[0] != '\0') {
  1733. MaybeAppend(state, "::");
  1734. MaybeAppend(state, p->real_name);
  1735. }
  1736. ++state->parse_state.mangled_idx;
  1737. return true;
  1738. }
  1739. }
  1740. }
  1741. state->parse_state = copy;
  1742. return false;
  1743. }
  1744. // Parse <mangled-name>, optionally followed by either a function-clone suffix
  1745. // or version suffix. Returns true only if all of "mangled_cur" was consumed.
  1746. static bool ParseTopLevelMangledName(State *state) {
  1747. ComplexityGuard guard(state);
  1748. if (guard.IsTooComplex()) return false;
  1749. if (ParseMangledName(state)) {
  1750. if (RemainingInput(state)[0] != '\0') {
  1751. // Drop trailing function clone suffix, if any.
  1752. if (IsFunctionCloneSuffix(RemainingInput(state))) {
  1753. return true;
  1754. }
  1755. // Append trailing version suffix if any.
  1756. // ex. _Z3foo@@GLIBCXX_3.4
  1757. if (RemainingInput(state)[0] == '@') {
  1758. MaybeAppend(state, RemainingInput(state));
  1759. return true;
  1760. }
  1761. return false; // Unconsumed suffix.
  1762. }
  1763. return true;
  1764. }
  1765. return false;
  1766. }
  1767. static bool Overflowed(const State *state) {
  1768. return state->parse_state.out_cur_idx >= state->out_end_idx;
  1769. }
  1770. // The demangler entry point.
  1771. bool Demangle(const char *mangled, char *out, int out_size) {
  1772. State state;
  1773. InitState(&state, mangled, out, out_size);
  1774. return ParseTopLevelMangledName(&state) && !Overflowed(&state) &&
  1775. state.parse_state.out_cur_idx > 0;
  1776. }
  1777. } // namespace debugging_internal
  1778. ABSL_NAMESPACE_END
  1779. } // namespace absl