protoc-gen-upb.cc 52 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498
  1. // Copyright (c) 2009-2021, Google LLC
  2. // All rights reserved.
  3. //
  4. // Redistribution and use in source and binary forms, with or without
  5. // modification, are permitted provided that the following conditions are met:
  6. // * Redistributions of source code must retain the above copyright
  7. // notice, this list of conditions and the following disclaimer.
  8. // * Redistributions in binary form must reproduce the above copyright
  9. // notice, this list of conditions and the following disclaimer in the
  10. // documentation and/or other materials provided with the distribution.
  11. // * Neither the name of Google LLC nor the
  12. // names of its contributors may be used to endorse or promote products
  13. // derived from this software without specific prior written permission.
  14. //
  15. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18. // ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
  19. // INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  20. // (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  21. // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  22. // ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  24. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. #include <memory>
  26. #include "absl/container/flat_hash_map.h"
  27. #include "absl/container/flat_hash_set.h"
  28. #include "absl/strings/ascii.h"
  29. #include "absl/strings/substitute.h"
  30. #include "google/protobuf/compiler/code_generator.h"
  31. #include "google/protobuf/compiler/plugin.h"
  32. #include "google/protobuf/descriptor.h"
  33. #include "google/protobuf/descriptor.pb.h"
  34. #include "google/protobuf/wire_format.h"
  35. #include "upbc/common.h"
  36. #include "upbc/message_layout.h"
  37. namespace upbc {
  38. namespace {
  39. namespace protoc = ::google::protobuf::compiler;
  40. namespace protobuf = ::google::protobuf;
  41. std::string SourceFilename(const google::protobuf::FileDescriptor* file) {
  42. return StripExtension(file->name()) + ".upb.c";
  43. }
  44. std::string MessageInit(const protobuf::Descriptor* descriptor) {
  45. return MessageName(descriptor) + "_msginit";
  46. }
  47. std::string EnumInit(const protobuf::EnumDescriptor* descriptor) {
  48. return ToCIdent(descriptor->full_name()) + "_enuminit";
  49. }
  50. std::string ExtensionIdentBase(const protobuf::FieldDescriptor* ext) {
  51. assert(ext->is_extension());
  52. std::string ext_scope;
  53. if (ext->extension_scope()) {
  54. return MessageName(ext->extension_scope());
  55. } else {
  56. return ToCIdent(ext->file()->package());
  57. }
  58. }
  59. std::string ExtensionLayout(const google::protobuf::FieldDescriptor* ext) {
  60. return absl::StrCat(ExtensionIdentBase(ext), "_", ext->name(), "_ext");
  61. }
  62. const char* kEnumsInit = "enums_layout";
  63. const char* kExtensionsInit = "extensions_layout";
  64. const char* kMessagesInit = "messages_layout";
  65. void AddEnums(const protobuf::Descriptor* message,
  66. std::vector<const protobuf::EnumDescriptor*>* enums) {
  67. for (int i = 0; i < message->enum_type_count(); i++) {
  68. enums->push_back(message->enum_type(i));
  69. }
  70. for (int i = 0; i < message->nested_type_count(); i++) {
  71. AddEnums(message->nested_type(i), enums);
  72. }
  73. }
  74. std::vector<const protobuf::EnumDescriptor*> SortedEnums(
  75. const protobuf::FileDescriptor* file) {
  76. std::vector<const protobuf::EnumDescriptor*> enums;
  77. for (int i = 0; i < file->enum_type_count(); i++) {
  78. enums.push_back(file->enum_type(i));
  79. }
  80. for (int i = 0; i < file->message_type_count(); i++) {
  81. AddEnums(file->message_type(i), &enums);
  82. }
  83. return enums;
  84. }
  85. void AddMessages(const protobuf::Descriptor* message,
  86. std::vector<const protobuf::Descriptor*>* messages) {
  87. messages->push_back(message);
  88. for (int i = 0; i < message->nested_type_count(); i++) {
  89. AddMessages(message->nested_type(i), messages);
  90. }
  91. }
  92. // Ordering must match upb/def.c!
  93. //
  94. // The ordering is significant because each upb_MessageDef* will point at the
  95. // corresponding upb_MiniTable and we just iterate through the list without
  96. // any search or lookup.
  97. std::vector<const protobuf::Descriptor*> SortedMessages(
  98. const protobuf::FileDescriptor* file) {
  99. std::vector<const protobuf::Descriptor*> messages;
  100. for (int i = 0; i < file->message_type_count(); i++) {
  101. AddMessages(file->message_type(i), &messages);
  102. }
  103. return messages;
  104. }
  105. void AddExtensionsFromMessage(
  106. const protobuf::Descriptor* message,
  107. std::vector<const protobuf::FieldDescriptor*>* exts) {
  108. for (int i = 0; i < message->extension_count(); i++) {
  109. exts->push_back(message->extension(i));
  110. }
  111. for (int i = 0; i < message->nested_type_count(); i++) {
  112. AddExtensionsFromMessage(message->nested_type(i), exts);
  113. }
  114. }
  115. // Ordering must match upb/def.c!
  116. //
  117. // The ordering is significant because each upb_FieldDef* will point at the
  118. // corresponding upb_MiniTable_Extension and we just iterate through the list
  119. // without any search or lookup.
  120. std::vector<const protobuf::FieldDescriptor*> SortedExtensions(
  121. const protobuf::FileDescriptor* file) {
  122. std::vector<const protobuf::FieldDescriptor*> ret;
  123. for (int i = 0; i < file->extension_count(); i++) {
  124. ret.push_back(file->extension(i));
  125. }
  126. for (int i = 0; i < file->message_type_count(); i++) {
  127. AddExtensionsFromMessage(file->message_type(i), &ret);
  128. }
  129. return ret;
  130. }
  131. std::vector<const protobuf::FieldDescriptor*> FieldNumberOrder(
  132. const protobuf::Descriptor* message) {
  133. std::vector<const protobuf::FieldDescriptor*> fields;
  134. for (int i = 0; i < message->field_count(); i++) {
  135. fields.push_back(message->field(i));
  136. }
  137. std::sort(fields.begin(), fields.end(),
  138. [](const protobuf::FieldDescriptor* a,
  139. const protobuf::FieldDescriptor* b) {
  140. return a->number() < b->number();
  141. });
  142. return fields;
  143. }
  144. std::vector<const protobuf::FieldDescriptor*> SortedSubmessages(
  145. const protobuf::Descriptor* message) {
  146. std::vector<const protobuf::FieldDescriptor*> ret;
  147. for (int i = 0; i < message->field_count(); i++) {
  148. if (message->field(i)->cpp_type() ==
  149. protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
  150. ret.push_back(message->field(i));
  151. }
  152. }
  153. std::sort(ret.begin(), ret.end(),
  154. [](const protobuf::FieldDescriptor* a,
  155. const protobuf::FieldDescriptor* b) {
  156. return a->message_type()->full_name() <
  157. b->message_type()->full_name();
  158. });
  159. return ret;
  160. }
  161. std::vector<const protobuf::FieldDescriptor*> SortedSubEnums(
  162. const protobuf::Descriptor* message) {
  163. std::vector<const protobuf::FieldDescriptor*> ret;
  164. for (int i = 0; i < message->field_count(); i++) {
  165. if (message->field(i)->cpp_type() ==
  166. protobuf::FieldDescriptor::CPPTYPE_ENUM) {
  167. ret.push_back(message->field(i));
  168. }
  169. }
  170. std::sort(ret.begin(), ret.end(),
  171. [](const protobuf::FieldDescriptor* a,
  172. const protobuf::FieldDescriptor* b) {
  173. return a->enum_type()->full_name() < b->enum_type()->full_name();
  174. });
  175. return ret;
  176. }
  177. std::string EnumValueSymbol(const protobuf::EnumValueDescriptor* value) {
  178. return ToCIdent(value->full_name());
  179. }
  180. std::string GetSizeInit(const MessageLayout::Size& size) {
  181. return absl::Substitute("UPB_SIZE($0, $1)", size.size32, size.size64);
  182. }
  183. std::string CTypeInternal(const protobuf::FieldDescriptor* field,
  184. bool is_const) {
  185. std::string maybe_const = is_const ? "const " : "";
  186. switch (field->cpp_type()) {
  187. case protobuf::FieldDescriptor::CPPTYPE_MESSAGE: {
  188. std::string maybe_struct =
  189. field->file() != field->message_type()->file() ? "struct " : "";
  190. return maybe_const + maybe_struct + MessageName(field->message_type()) +
  191. "*";
  192. }
  193. case protobuf::FieldDescriptor::CPPTYPE_BOOL:
  194. return "bool";
  195. case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
  196. return "float";
  197. case protobuf::FieldDescriptor::CPPTYPE_INT32:
  198. case protobuf::FieldDescriptor::CPPTYPE_ENUM:
  199. return "int32_t";
  200. case protobuf::FieldDescriptor::CPPTYPE_UINT32:
  201. return "uint32_t";
  202. case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
  203. return "double";
  204. case protobuf::FieldDescriptor::CPPTYPE_INT64:
  205. return "int64_t";
  206. case protobuf::FieldDescriptor::CPPTYPE_UINT64:
  207. return "uint64_t";
  208. case protobuf::FieldDescriptor::CPPTYPE_STRING:
  209. return "upb_StringView";
  210. default:
  211. fprintf(stderr, "Unexpected type");
  212. abort();
  213. }
  214. }
  215. std::string SizeLg2(const protobuf::FieldDescriptor* field) {
  216. switch (field->cpp_type()) {
  217. case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
  218. return "UPB_SIZE(2, 3)";
  219. case protobuf::FieldDescriptor::CPPTYPE_ENUM:
  220. return std::to_string(2);
  221. case protobuf::FieldDescriptor::CPPTYPE_BOOL:
  222. return std::to_string(1);
  223. case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
  224. return std::to_string(2);
  225. case protobuf::FieldDescriptor::CPPTYPE_INT32:
  226. return std::to_string(2);
  227. case protobuf::FieldDescriptor::CPPTYPE_UINT32:
  228. return std::to_string(2);
  229. case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
  230. return std::to_string(3);
  231. case protobuf::FieldDescriptor::CPPTYPE_INT64:
  232. return std::to_string(3);
  233. case protobuf::FieldDescriptor::CPPTYPE_UINT64:
  234. return std::to_string(3);
  235. case protobuf::FieldDescriptor::CPPTYPE_STRING:
  236. return "UPB_SIZE(3, 4)";
  237. default:
  238. fprintf(stderr, "Unexpected type");
  239. abort();
  240. }
  241. }
  242. std::string SizeRep(const protobuf::FieldDescriptor* field) {
  243. switch (field->cpp_type()) {
  244. case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
  245. return "upb_FieldRep_Pointer";
  246. case protobuf::FieldDescriptor::CPPTYPE_ENUM:
  247. case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
  248. case protobuf::FieldDescriptor::CPPTYPE_INT32:
  249. case protobuf::FieldDescriptor::CPPTYPE_UINT32:
  250. return "upb_FieldRep_4Byte";
  251. case protobuf::FieldDescriptor::CPPTYPE_BOOL:
  252. return "upb_FieldRep_1Byte";
  253. case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
  254. case protobuf::FieldDescriptor::CPPTYPE_INT64:
  255. case protobuf::FieldDescriptor::CPPTYPE_UINT64:
  256. return "upb_FieldRep_8Byte";
  257. case protobuf::FieldDescriptor::CPPTYPE_STRING:
  258. return "upb_FieldRep_StringView";
  259. default:
  260. fprintf(stderr, "Unexpected type");
  261. abort();
  262. }
  263. }
  264. bool HasNonZeroDefault(const protobuf::FieldDescriptor* field) {
  265. switch (field->cpp_type()) {
  266. case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
  267. return false;
  268. case protobuf::FieldDescriptor::CPPTYPE_STRING:
  269. return !field->default_value_string().empty();
  270. case protobuf::FieldDescriptor::CPPTYPE_INT32:
  271. return field->default_value_int32() != 0;
  272. case protobuf::FieldDescriptor::CPPTYPE_INT64:
  273. return field->default_value_int64() != 0;
  274. case protobuf::FieldDescriptor::CPPTYPE_UINT32:
  275. return field->default_value_uint32() != 0;
  276. case protobuf::FieldDescriptor::CPPTYPE_UINT64:
  277. return field->default_value_uint64() != 0;
  278. case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
  279. return field->default_value_float() != 0;
  280. case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
  281. return field->default_value_double() != 0;
  282. case protobuf::FieldDescriptor::CPPTYPE_BOOL:
  283. return field->default_value_bool() != false;
  284. case protobuf::FieldDescriptor::CPPTYPE_ENUM:
  285. // Use a number instead of a symbolic name so that we don't require
  286. // this enum's header to be included.
  287. return field->default_value_enum()->number() != 0;
  288. }
  289. ABSL_ASSERT(false);
  290. return "XXX";
  291. }
  292. std::string FieldDefault(const protobuf::FieldDescriptor* field) {
  293. switch (field->cpp_type()) {
  294. case protobuf::FieldDescriptor::CPPTYPE_MESSAGE:
  295. return "NULL";
  296. case protobuf::FieldDescriptor::CPPTYPE_STRING:
  297. return absl::Substitute("upb_StringView_FromString(\"$0\")",
  298. absl::CEscape(field->default_value_string()));
  299. case protobuf::FieldDescriptor::CPPTYPE_INT32:
  300. return absl::StrCat(field->default_value_int32());
  301. case protobuf::FieldDescriptor::CPPTYPE_INT64:
  302. return absl::StrCat(field->default_value_int64());
  303. case protobuf::FieldDescriptor::CPPTYPE_UINT32:
  304. return absl::StrCat(field->default_value_uint32());
  305. case protobuf::FieldDescriptor::CPPTYPE_UINT64:
  306. return absl::StrCat(field->default_value_uint64());
  307. case protobuf::FieldDescriptor::CPPTYPE_FLOAT:
  308. return absl::StrCat(field->default_value_float());
  309. case protobuf::FieldDescriptor::CPPTYPE_DOUBLE:
  310. return absl::StrCat(field->default_value_double());
  311. case protobuf::FieldDescriptor::CPPTYPE_BOOL:
  312. return field->default_value_bool() ? "true" : "false";
  313. case protobuf::FieldDescriptor::CPPTYPE_ENUM:
  314. // Use a number instead of a symbolic name so that we don't require
  315. // this enum's header to be included.
  316. return absl::StrCat(field->default_value_enum()->number());
  317. }
  318. ABSL_ASSERT(false);
  319. return "XXX";
  320. }
  321. std::string CType(const protobuf::FieldDescriptor* field) {
  322. return CTypeInternal(field, false);
  323. }
  324. std::string CTypeConst(const protobuf::FieldDescriptor* field) {
  325. return CTypeInternal(field, true);
  326. }
  327. void DumpEnumValues(const protobuf::EnumDescriptor* desc, Output& output) {
  328. std::vector<const protobuf::EnumValueDescriptor*> values;
  329. for (int i = 0; i < desc->value_count(); i++) {
  330. values.push_back(desc->value(i));
  331. }
  332. std::sort(values.begin(), values.end(),
  333. [](const protobuf::EnumValueDescriptor* a,
  334. const protobuf::EnumValueDescriptor* b) {
  335. return a->number() < b->number();
  336. });
  337. for (size_t i = 0; i < values.size(); i++) {
  338. auto value = values[i];
  339. output(" $0 = $1", EnumValueSymbol(value), value->number());
  340. if (i != values.size() - 1) {
  341. output(",");
  342. }
  343. output("\n");
  344. }
  345. }
  346. void GenerateExtensionInHeader(const protobuf::FieldDescriptor* ext,
  347. Output& output) {
  348. output(
  349. "UPB_INLINE bool $0_has_$1(const struct $2 *msg) { "
  350. "return _upb_Message_Getext(msg, &$3) != NULL; }\n",
  351. ExtensionIdentBase(ext), ext->name(), MessageName(ext->containing_type()),
  352. ExtensionLayout(ext));
  353. if (ext->is_repeated()) {
  354. } else if (ext->message_type()) {
  355. output(
  356. "UPB_INLINE $0 $1_$2(const struct $3 *msg) { "
  357. "const upb_Message_Extension *ext = _upb_Message_Getext(msg, &$4); "
  358. "UPB_ASSERT(ext); return *UPB_PTR_AT(&ext->data, 0, $0); }\n",
  359. CTypeConst(ext), ExtensionIdentBase(ext), ext->name(),
  360. MessageName(ext->containing_type()), ExtensionLayout(ext),
  361. FieldDefault(ext));
  362. } else {
  363. output(
  364. "UPB_INLINE $0 $1_$2(const struct $3 *msg) { "
  365. "const upb_Message_Extension *ext = _upb_Message_Getext(msg, &$4); "
  366. "return ext ? *UPB_PTR_AT(&ext->data, 0, $0) : $5; }\n",
  367. CTypeConst(ext), ExtensionIdentBase(ext), ext->name(),
  368. MessageName(ext->containing_type()), ExtensionLayout(ext),
  369. FieldDefault(ext));
  370. }
  371. }
  372. void GenerateMessageInHeader(const protobuf::Descriptor* message,
  373. Output& output) {
  374. MessageLayout layout(message);
  375. output("/* $0 */\n\n", message->full_name());
  376. std::string msg_name = ToCIdent(message->full_name());
  377. if (!message->options().map_entry()) {
  378. output(
  379. R"cc(
  380. UPB_INLINE $0* $0_new(upb_Arena* arena) {
  381. return ($0*)_upb_Message_New(&$1, arena);
  382. }
  383. UPB_INLINE $0* $0_parse(const char* buf, size_t size, upb_Arena* arena) {
  384. $0* ret = $0_new(arena);
  385. if (!ret) return NULL;
  386. if (upb_Decode(buf, size, ret, &$1, NULL, 0, arena) != kUpb_DecodeStatus_Ok) {
  387. return NULL;
  388. }
  389. return ret;
  390. }
  391. UPB_INLINE $0* $0_parse_ex(const char* buf, size_t size,
  392. const upb_ExtensionRegistry* extreg,
  393. int options, upb_Arena* arena) {
  394. $0* ret = $0_new(arena);
  395. if (!ret) return NULL;
  396. if (upb_Decode(buf, size, ret, &$1, extreg, options, arena) !=
  397. kUpb_DecodeStatus_Ok) {
  398. return NULL;
  399. }
  400. return ret;
  401. }
  402. UPB_INLINE char* $0_serialize(const $0* msg, upb_Arena* arena, size_t* len) {
  403. return upb_Encode(msg, &$1, 0, arena, len);
  404. }
  405. UPB_INLINE char* $0_serialize_ex(const $0* msg, int options,
  406. upb_Arena* arena, size_t* len) {
  407. return upb_Encode(msg, &$1, options, arena, len);
  408. }
  409. )cc",
  410. MessageName(message), MessageInit(message));
  411. }
  412. for (int i = 0; i < message->real_oneof_decl_count(); i++) {
  413. const protobuf::OneofDescriptor* oneof = message->oneof_decl(i);
  414. std::string fullname = ToCIdent(oneof->full_name());
  415. output("typedef enum {\n");
  416. for (int j = 0; j < oneof->field_count(); j++) {
  417. const protobuf::FieldDescriptor* field = oneof->field(j);
  418. output(" $0_$1 = $2,\n", fullname, field->name(), field->number());
  419. }
  420. output(
  421. " $0_NOT_SET = 0\n"
  422. "} $0_oneofcases;\n",
  423. fullname);
  424. output(
  425. "UPB_INLINE $0_oneofcases $1_$2_case(const $1* msg) { "
  426. "return ($0_oneofcases)*UPB_PTR_AT(msg, $3, int32_t); }\n"
  427. "\n",
  428. fullname, msg_name, oneof->name(),
  429. GetSizeInit(layout.GetOneofCaseOffset(oneof)));
  430. }
  431. // Generate const methods.
  432. for (auto field : FieldNumberOrder(message)) {
  433. // Generate hazzer (if any).
  434. if (layout.HasHasbit(field)) {
  435. output(
  436. "UPB_INLINE bool $0_has_$1(const $0 *msg) { "
  437. "return _upb_hasbit(msg, $2); }\n",
  438. msg_name, field->name(), layout.GetHasbitIndex(field));
  439. } else if (field->real_containing_oneof()) {
  440. output(
  441. "UPB_INLINE bool $0_has_$1(const $0 *msg) { "
  442. "return _upb_getoneofcase(msg, $2) == $3; }\n",
  443. msg_name, field->name(),
  444. GetSizeInit(
  445. layout.GetOneofCaseOffset(field->real_containing_oneof())),
  446. field->number());
  447. } else if (field->message_type()) {
  448. output(
  449. "UPB_INLINE bool $0_has_$1(const $0 *msg) { "
  450. "return _upb_has_submsg_nohasbit(msg, $2); }\n",
  451. msg_name, field->name(), GetSizeInit(layout.GetFieldOffset(field)));
  452. }
  453. // Generate getter.
  454. if (field->is_map()) {
  455. const protobuf::Descriptor* entry = field->message_type();
  456. const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1);
  457. const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2);
  458. output(
  459. "UPB_INLINE size_t $0_$1_size(const $0 *msg) {"
  460. "return _upb_msg_map_size(msg, $2); }\n",
  461. msg_name, field->name(), GetSizeInit(layout.GetFieldOffset(field)));
  462. output(
  463. "UPB_INLINE bool $0_$1_get(const $0 *msg, $2 key, $3 *val) { "
  464. "return _upb_msg_map_get(msg, $4, &key, $5, val, $6); }\n",
  465. msg_name, field->name(), CType(key), CType(val),
  466. GetSizeInit(layout.GetFieldOffset(field)),
  467. key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
  468. ? "0"
  469. : "sizeof(key)",
  470. val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
  471. ? "0"
  472. : "sizeof(*val)");
  473. output(
  474. "UPB_INLINE $0 $1_$2_next(const $1 *msg, size_t* iter) { "
  475. "return ($0)_upb_msg_map_next(msg, $3, iter); }\n",
  476. CTypeConst(field), msg_name, field->name(),
  477. GetSizeInit(layout.GetFieldOffset(field)));
  478. } else if (message->options().map_entry()) {
  479. output(
  480. "UPB_INLINE $0 $1_$2(const $1 *msg) {\n"
  481. " $3 ret;\n"
  482. " _upb_msg_map_$2(msg, &ret, $4);\n"
  483. " return ret;\n"
  484. "}\n",
  485. CTypeConst(field), msg_name, field->name(), CType(field),
  486. field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
  487. ? "0"
  488. : "sizeof(ret)");
  489. } else if (field->is_repeated()) {
  490. output(
  491. "UPB_INLINE $0 const* $1_$2(const $1 *msg, size_t *len) { "
  492. "return ($0 const*)_upb_array_accessor(msg, $3, len); }\n",
  493. CTypeConst(field), msg_name, field->name(),
  494. GetSizeInit(layout.GetFieldOffset(field)));
  495. } else if (field->real_containing_oneof()) {
  496. output(
  497. "UPB_INLINE $0 $1_$2(const $1 *msg) { "
  498. "return UPB_READ_ONEOF(msg, $0, $3, $4, $5, $6); }\n",
  499. CTypeConst(field), msg_name, field->name(),
  500. GetSizeInit(layout.GetFieldOffset(field)),
  501. GetSizeInit(
  502. layout.GetOneofCaseOffset(field->real_containing_oneof())),
  503. field->number(), FieldDefault(field));
  504. } else {
  505. if (HasNonZeroDefault(field)) {
  506. output(
  507. R"cc(
  508. UPB_INLINE $0 $1_$2(const $1* msg) {
  509. return $1_has_$2(msg) ? *UPB_PTR_AT(msg, $3, $0) : $4;
  510. }
  511. )cc",
  512. CTypeConst(field), msg_name, field->name(),
  513. GetSizeInit(layout.GetFieldOffset(field)), FieldDefault(field));
  514. } else {
  515. output(
  516. R"cc(
  517. UPB_INLINE $0 $1_$2(const $1* msg) {
  518. return *UPB_PTR_AT(msg, $3, $0);
  519. }
  520. )cc",
  521. CTypeConst(field), msg_name, field->name(),
  522. GetSizeInit(layout.GetFieldOffset(field)));
  523. }
  524. }
  525. }
  526. output("\n");
  527. // Generate mutable methods.
  528. for (auto field : FieldNumberOrder(message)) {
  529. if (field->is_map()) {
  530. // TODO(haberman): add map-based mutators.
  531. const protobuf::Descriptor* entry = field->message_type();
  532. const protobuf::FieldDescriptor* key = entry->FindFieldByNumber(1);
  533. const protobuf::FieldDescriptor* val = entry->FindFieldByNumber(2);
  534. output(
  535. "UPB_INLINE void $0_$1_clear($0 *msg) { _upb_msg_map_clear(msg, $2); "
  536. "}\n",
  537. msg_name, field->name(), GetSizeInit(layout.GetFieldOffset(field)));
  538. output(
  539. "UPB_INLINE bool $0_$1_set($0 *msg, $2 key, $3 val, upb_Arena *a) { "
  540. "return _upb_msg_map_set(msg, $4, &key, $5, &val, $6, a); }\n",
  541. msg_name, field->name(), CType(key), CType(val),
  542. GetSizeInit(layout.GetFieldOffset(field)),
  543. key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
  544. ? "0"
  545. : "sizeof(key)",
  546. val->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
  547. ? "0"
  548. : "sizeof(val)");
  549. output(
  550. "UPB_INLINE bool $0_$1_delete($0 *msg, $2 key) { "
  551. "return _upb_msg_map_delete(msg, $3, &key, $4); }\n",
  552. msg_name, field->name(), CType(key),
  553. GetSizeInit(layout.GetFieldOffset(field)),
  554. key->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
  555. ? "0"
  556. : "sizeof(key)");
  557. output(
  558. "UPB_INLINE $0 $1_$2_nextmutable($1 *msg, size_t* iter) { "
  559. "return ($0)_upb_msg_map_next(msg, $3, iter); }\n",
  560. CType(field), msg_name, field->name(),
  561. GetSizeInit(layout.GetFieldOffset(field)));
  562. } else if (field->is_repeated()) {
  563. output(
  564. "UPB_INLINE $0* $1_mutable_$2($1 *msg, size_t *len) {\n"
  565. " return ($0*)_upb_array_mutable_accessor(msg, $3, len);\n"
  566. "}\n",
  567. CType(field), msg_name, field->name(),
  568. GetSizeInit(layout.GetFieldOffset(field)));
  569. output(
  570. "UPB_INLINE $0* $1_resize_$2($1 *msg, size_t len, "
  571. "upb_Arena *arena) {\n"
  572. " return ($0*)_upb_Array_Resize_accessor2(msg, $3, len, $4, "
  573. "arena);\n"
  574. "}\n",
  575. CType(field), msg_name, field->name(),
  576. GetSizeInit(layout.GetFieldOffset(field)), SizeLg2(field));
  577. if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
  578. output(
  579. "UPB_INLINE struct $0* $1_add_$2($1 *msg, upb_Arena *arena) {\n"
  580. " struct $0* sub = (struct $0*)_upb_Message_New(&$3, arena);\n"
  581. " bool ok = _upb_Array_Append_accessor2(\n"
  582. " msg, $4, $5, &sub, arena);\n"
  583. " if (!ok) return NULL;\n"
  584. " return sub;\n"
  585. "}\n",
  586. MessageName(field->message_type()), msg_name, field->name(),
  587. MessageInit(field->message_type()),
  588. GetSizeInit(layout.GetFieldOffset(field)), SizeLg2(field));
  589. } else {
  590. output(
  591. "UPB_INLINE bool $1_add_$2($1 *msg, $0 val, upb_Arena *arena) {\n"
  592. " return _upb_Array_Append_accessor2(msg, $3, $4, &val,\n"
  593. " arena);\n"
  594. "}\n",
  595. CType(field), msg_name, field->name(),
  596. GetSizeInit(layout.GetFieldOffset(field)), SizeLg2(field));
  597. }
  598. } else {
  599. // Non-repeated field.
  600. if (message->options().map_entry() && field->name() == "key") {
  601. // Key cannot be mutated.
  602. continue;
  603. }
  604. // The common function signature for all setters. Varying implementations
  605. // follow.
  606. output("UPB_INLINE void $0_set_$1($0 *msg, $2 value) {\n", msg_name,
  607. field->name(), CType(field));
  608. if (message->options().map_entry()) {
  609. output(
  610. " _upb_msg_map_set_value(msg, &value, $0);\n"
  611. "}\n",
  612. field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_STRING
  613. ? "0"
  614. : "sizeof(" + CType(field) + ")");
  615. } else if (field->real_containing_oneof()) {
  616. output(
  617. " UPB_WRITE_ONEOF(msg, $0, $1, value, $2, $3);\n"
  618. "}\n",
  619. CType(field), GetSizeInit(layout.GetFieldOffset(field)),
  620. GetSizeInit(
  621. layout.GetOneofCaseOffset(field->real_containing_oneof())),
  622. field->number());
  623. } else {
  624. if (MessageLayout::HasHasbit(field)) {
  625. output(" _upb_sethas(msg, $0);\n", layout.GetHasbitIndex(field));
  626. }
  627. output(
  628. " *UPB_PTR_AT(msg, $1, $0) = value;\n"
  629. "}\n",
  630. CType(field), GetSizeInit(layout.GetFieldOffset(field)));
  631. }
  632. if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE &&
  633. !message->options().map_entry()) {
  634. output(
  635. "UPB_INLINE struct $0* $1_mutable_$2($1 *msg, upb_Arena *arena) {\n"
  636. " struct $0* sub = (struct $0*)$1_$2(msg);\n"
  637. " if (sub == NULL) {\n"
  638. " sub = (struct $0*)_upb_Message_New(&$3, arena);\n"
  639. " if (!sub) return NULL;\n"
  640. " $1_set_$2(msg, sub);\n"
  641. " }\n"
  642. " return sub;\n"
  643. "}\n",
  644. MessageName(field->message_type()), msg_name, field->name(),
  645. MessageInit(field->message_type()));
  646. }
  647. }
  648. }
  649. output("\n");
  650. }
  651. void WriteHeader(const protobuf::FileDescriptor* file, Output& output) {
  652. EmitFileWarning(file, output);
  653. output(
  654. "#ifndef $0_UPB_H_\n"
  655. "#define $0_UPB_H_\n\n"
  656. "#include \"upb/msg_internal.h\"\n"
  657. "#include \"upb/decode.h\"\n"
  658. "#include \"upb/decode_fast.h\"\n"
  659. "#include \"upb/encode.h\"\n\n",
  660. ToPreproc(file->name()));
  661. for (int i = 0; i < file->public_dependency_count(); i++) {
  662. if (i == 0) {
  663. output("/* Public Imports. */\n");
  664. }
  665. output("#include \"$0\"\n", HeaderFilename(file));
  666. if (i == file->public_dependency_count() - 1) {
  667. output("\n");
  668. }
  669. }
  670. output(
  671. "#include \"upb/port_def.inc\"\n"
  672. "\n"
  673. "#ifdef __cplusplus\n"
  674. "extern \"C\" {\n"
  675. "#endif\n"
  676. "\n");
  677. const std::vector<const protobuf::Descriptor*> this_file_messages =
  678. SortedMessages(file);
  679. const std::vector<const protobuf::FieldDescriptor*> this_file_exts =
  680. SortedExtensions(file);
  681. // Forward-declare types defined in this file.
  682. for (auto message : this_file_messages) {
  683. output("struct $0;\n", ToCIdent(message->full_name()));
  684. }
  685. for (auto message : this_file_messages) {
  686. output("typedef struct $0 $0;\n", ToCIdent(message->full_name()));
  687. }
  688. for (auto message : this_file_messages) {
  689. output("extern const upb_MiniTable $0;\n", MessageInit(message));
  690. }
  691. for (auto ext : this_file_exts) {
  692. output("extern const upb_MiniTable_Extension $0;\n", ExtensionLayout(ext));
  693. }
  694. // Forward-declare types not in this file, but used as submessages.
  695. // Order by full name for consistent ordering.
  696. std::map<std::string, const protobuf::Descriptor*> forward_messages;
  697. for (auto* message : this_file_messages) {
  698. for (int i = 0; i < message->field_count(); i++) {
  699. const protobuf::FieldDescriptor* field = message->field(i);
  700. if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE &&
  701. field->file() != field->message_type()->file()) {
  702. forward_messages[field->message_type()->full_name()] =
  703. field->message_type();
  704. }
  705. }
  706. }
  707. for (auto ext : this_file_exts) {
  708. if (ext->file() != ext->containing_type()->file()) {
  709. forward_messages[ext->containing_type()->full_name()] =
  710. ext->containing_type();
  711. }
  712. }
  713. for (const auto& pair : forward_messages) {
  714. output("struct $0;\n", MessageName(pair.second));
  715. }
  716. for (const auto& pair : forward_messages) {
  717. output("extern const upb_MiniTable $0;\n", MessageInit(pair.second));
  718. }
  719. if (!this_file_messages.empty()) {
  720. output("\n");
  721. }
  722. std::vector<const protobuf::EnumDescriptor*> this_file_enums =
  723. SortedEnums(file);
  724. std::sort(
  725. this_file_enums.begin(), this_file_enums.end(),
  726. [](const protobuf::EnumDescriptor* a, const protobuf::EnumDescriptor* b) {
  727. return a->full_name() < b->full_name();
  728. });
  729. for (auto enumdesc : this_file_enums) {
  730. output("typedef enum {\n");
  731. DumpEnumValues(enumdesc, output);
  732. output("} $0;\n\n", ToCIdent(enumdesc->full_name()));
  733. }
  734. output("\n");
  735. if (file->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2) {
  736. for (const auto* enumdesc : this_file_enums) {
  737. output("extern const upb_MiniTable_Enum $0;\n", EnumInit(enumdesc));
  738. }
  739. }
  740. output("\n");
  741. for (auto message : this_file_messages) {
  742. GenerateMessageInHeader(message, output);
  743. }
  744. for (auto ext : this_file_exts) {
  745. GenerateExtensionInHeader(ext, output);
  746. }
  747. output("extern const upb_MiniTable_File $0;\n\n", FileLayoutName(file));
  748. if (file->name() ==
  749. protobuf::FileDescriptorProto::descriptor()->file()->name()) {
  750. // This is gratuitously inefficient with how many times it rebuilds
  751. // MessageLayout objects for the same message. But we only do this for one
  752. // proto (descriptor.proto) so we don't worry about it.
  753. const protobuf::Descriptor* max32 = nullptr;
  754. const protobuf::Descriptor* max64 = nullptr;
  755. for (const auto* message : this_file_messages) {
  756. if (absl::EndsWith(message->name(), "Options")) {
  757. MessageLayout layout(message);
  758. if (max32 == nullptr) {
  759. max32 = message;
  760. max64 = message;
  761. } else {
  762. if (layout.message_size().size32 >
  763. MessageLayout(max32).message_size().size32) {
  764. max32 = message;
  765. }
  766. if (layout.message_size().size64 >
  767. MessageLayout(max64).message_size().size64) {
  768. max64 = message;
  769. }
  770. }
  771. }
  772. }
  773. output("/* Max size 32 is $0 */\n", max32->full_name());
  774. output("/* Max size 64 is $0 */\n", max64->full_name());
  775. MessageLayout::Size size;
  776. size.size32 = MessageLayout(max32).message_size().size32;
  777. size.size64 = MessageLayout(max32).message_size().size64;
  778. output("#define _UPB_MAXOPT_SIZE $0\n\n", GetSizeInit(size));
  779. }
  780. output(
  781. "#ifdef __cplusplus\n"
  782. "} /* extern \"C\" */\n"
  783. "#endif\n"
  784. "\n"
  785. "#include \"upb/port_undef.inc\"\n"
  786. "\n"
  787. "#endif /* $0_UPB_H_ */\n",
  788. ToPreproc(file->name()));
  789. }
  790. int TableDescriptorType(const protobuf::FieldDescriptor* field) {
  791. if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2 &&
  792. field->type() == protobuf::FieldDescriptor::TYPE_STRING) {
  793. // From the perspective of the binary encoder/decoder, proto2 string fields
  794. // are identical to bytes fields. Only in proto3 do we check UTF-8 for
  795. // string fields at parse time.
  796. //
  797. // If we ever use these tables for JSON encoding/decoding (for example by
  798. // embedding field names on the side) we will have to revisit this, because
  799. // string vs. bytes behavior is not affected by proto2 vs proto3.
  800. return protobuf::FieldDescriptor::TYPE_BYTES;
  801. } else if (field->enum_type() &&
  802. field->enum_type()->file()->syntax() ==
  803. protobuf::FileDescriptor::SYNTAX_PROTO3) {
  804. // From the perspective of the binary decoder, proto3 enums are identical to
  805. // int32 fields. Only in proto2 do we check enum values to make sure they
  806. // are defined in the enum.
  807. return protobuf::FieldDescriptor::TYPE_INT32;
  808. } else {
  809. return field->type();
  810. }
  811. }
  812. struct SubLayoutArray {
  813. public:
  814. SubLayoutArray(const protobuf::Descriptor* message);
  815. const std::vector<const protobuf::Descriptor*>& submsgs() const {
  816. return submsgs_;
  817. }
  818. const std::vector<const protobuf::EnumDescriptor*>& subenums() const {
  819. return subenums_;
  820. }
  821. int total_count() const { return submsgs_.size() + subenums_.size(); }
  822. int GetIndex(const void* sub) {
  823. auto it = indexes_.find(sub);
  824. assert(it != indexes_.end());
  825. return it->second;
  826. }
  827. private:
  828. std::vector<const protobuf::Descriptor*> submsgs_;
  829. std::vector<const protobuf::EnumDescriptor*> subenums_;
  830. absl::flat_hash_map<const void*, int> indexes_;
  831. };
  832. SubLayoutArray::SubLayoutArray(const protobuf::Descriptor* message) {
  833. MessageLayout layout(message);
  834. std::vector<const protobuf::FieldDescriptor*> sorted_submsgs =
  835. SortedSubmessages(message);
  836. int i = 0;
  837. for (const auto* submsg : sorted_submsgs) {
  838. if (!indexes_.try_emplace(submsg->message_type(), i).second) {
  839. // Already present.
  840. continue;
  841. }
  842. submsgs_.push_back(submsg->message_type());
  843. i++;
  844. }
  845. std::vector<const protobuf::FieldDescriptor*> sorted_subenums =
  846. SortedSubEnums(message);
  847. for (const auto* field : sorted_subenums) {
  848. if (field->file()->syntax() != protobuf::FileDescriptor::SYNTAX_PROTO2) {
  849. continue;
  850. }
  851. if (!indexes_.try_emplace(field->enum_type(), i).second) {
  852. // Already present.
  853. continue;
  854. }
  855. subenums_.push_back(field->enum_type());
  856. i++;
  857. }
  858. }
  859. typedef std::pair<std::string, uint64_t> TableEntry;
  860. uint64_t GetEncodedTag(const protobuf::FieldDescriptor* field) {
  861. protobuf::internal::WireFormatLite::WireType wire_type =
  862. protobuf::internal::WireFormat::WireTypeForField(field);
  863. uint32_t unencoded_tag =
  864. protobuf::internal::WireFormatLite::MakeTag(field->number(), wire_type);
  865. uint8_t tag_bytes[10] = {0};
  866. protobuf::io::CodedOutputStream::WriteVarint32ToArray(unencoded_tag,
  867. tag_bytes);
  868. uint64_t encoded_tag = 0;
  869. memcpy(&encoded_tag, tag_bytes, sizeof(encoded_tag));
  870. // TODO: byte-swap for big endian.
  871. return encoded_tag;
  872. }
  873. int GetTableSlot(const protobuf::FieldDescriptor* field) {
  874. uint64_t tag = GetEncodedTag(field);
  875. if (tag > 0x7fff) {
  876. // Tag must fit within a two-byte varint.
  877. return -1;
  878. }
  879. return (tag & 0xf8) >> 3;
  880. }
  881. bool TryFillTableEntry(const protobuf::Descriptor* message,
  882. const MessageLayout& layout,
  883. const protobuf::FieldDescriptor* field,
  884. TableEntry& ent) {
  885. std::string type = "";
  886. std::string cardinality = "";
  887. switch (field->type()) {
  888. case protobuf::FieldDescriptor::TYPE_BOOL:
  889. type = "b1";
  890. break;
  891. case protobuf::FieldDescriptor::TYPE_ENUM:
  892. if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO2) {
  893. // We don't have the means to test proto2 enum fields for valid values.
  894. return false;
  895. }
  896. ABSL_FALLTHROUGH_INTENDED;
  897. case protobuf::FieldDescriptor::TYPE_INT32:
  898. case protobuf::FieldDescriptor::TYPE_UINT32:
  899. type = "v4";
  900. break;
  901. case protobuf::FieldDescriptor::TYPE_INT64:
  902. case protobuf::FieldDescriptor::TYPE_UINT64:
  903. type = "v8";
  904. break;
  905. case protobuf::FieldDescriptor::TYPE_FIXED32:
  906. case protobuf::FieldDescriptor::TYPE_SFIXED32:
  907. case protobuf::FieldDescriptor::TYPE_FLOAT:
  908. type = "f4";
  909. break;
  910. case protobuf::FieldDescriptor::TYPE_FIXED64:
  911. case protobuf::FieldDescriptor::TYPE_SFIXED64:
  912. case protobuf::FieldDescriptor::TYPE_DOUBLE:
  913. type = "f8";
  914. break;
  915. case protobuf::FieldDescriptor::TYPE_SINT32:
  916. type = "z4";
  917. break;
  918. case protobuf::FieldDescriptor::TYPE_SINT64:
  919. type = "z8";
  920. break;
  921. case protobuf::FieldDescriptor::TYPE_STRING:
  922. if (field->file()->syntax() == protobuf::FileDescriptor::SYNTAX_PROTO3) {
  923. // Only proto3 validates UTF-8.
  924. type = "s";
  925. break;
  926. }
  927. ABSL_FALLTHROUGH_INTENDED;
  928. case protobuf::FieldDescriptor::TYPE_BYTES:
  929. type = "b";
  930. break;
  931. case protobuf::FieldDescriptor::TYPE_MESSAGE:
  932. if (field->is_map()) {
  933. return false; // Not supported yet (ever?).
  934. }
  935. type = "m";
  936. break;
  937. default:
  938. return false; // Not supported yet.
  939. }
  940. switch (field->label()) {
  941. case protobuf::FieldDescriptor::LABEL_REPEATED:
  942. if (field->is_packed()) {
  943. cardinality = "p";
  944. } else {
  945. cardinality = "r";
  946. }
  947. break;
  948. case protobuf::FieldDescriptor::LABEL_OPTIONAL:
  949. case protobuf::FieldDescriptor::LABEL_REQUIRED:
  950. if (field->real_containing_oneof()) {
  951. cardinality = "o";
  952. } else {
  953. cardinality = "s";
  954. }
  955. break;
  956. }
  957. uint64_t expected_tag = GetEncodedTag(field);
  958. MessageLayout::Size offset = layout.GetFieldOffset(field);
  959. // Data is:
  960. //
  961. // 48 32 16 0
  962. // |--------|--------|--------|--------|--------|--------|--------|--------|
  963. // | offset (16) |case offset (16) |presence| submsg | exp. tag (16) |
  964. // |--------|--------|--------|--------|--------|--------|--------|--------|
  965. //
  966. // - |presence| is either hasbit index or field number for oneofs.
  967. uint64_t data = offset.size64 << 48 | expected_tag;
  968. if (field->is_repeated()) {
  969. // No hasbit/oneof-related fields.
  970. }
  971. if (field->real_containing_oneof()) {
  972. MessageLayout::Size case_offset =
  973. layout.GetOneofCaseOffset(field->real_containing_oneof());
  974. if (case_offset.size64 > 0xffff) return false;
  975. assert(field->number() < 256);
  976. data |= field->number() << 24;
  977. data |= case_offset.size64 << 32;
  978. } else {
  979. uint64_t hasbit_index = 63; // No hasbit (set a high, unused bit).
  980. if (layout.HasHasbit(field)) {
  981. hasbit_index = layout.GetHasbitIndex(field);
  982. if (hasbit_index > 31) return false;
  983. }
  984. data |= hasbit_index << 24;
  985. }
  986. if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
  987. SubLayoutArray sublayout_array(message);
  988. uint64_t idx = sublayout_array.GetIndex(field->message_type());
  989. if (idx > 255) return false;
  990. data |= idx << 16;
  991. std::string size_ceil = "max";
  992. size_t size = SIZE_MAX;
  993. if (field->message_type()->file() == field->file()) {
  994. // We can only be guaranteed the size of the sub-message if it is in the
  995. // same file as us. We could relax this to increase the speed of
  996. // cross-file sub-message parsing if we are comfortable requiring that
  997. // users compile all messages at the same time.
  998. MessageLayout sub_layout(field->message_type());
  999. size = sub_layout.message_size().size64 + 8;
  1000. }
  1001. std::vector<size_t> breaks = {64, 128, 192, 256};
  1002. for (auto brk : breaks) {
  1003. if (size <= brk) {
  1004. size_ceil = std::to_string(brk);
  1005. break;
  1006. }
  1007. }
  1008. ent.first = absl::Substitute("upb_p$0$1_$2bt_max$3b", cardinality, type,
  1009. expected_tag > 0xff ? "2" : "1", size_ceil);
  1010. } else {
  1011. ent.first = absl::Substitute("upb_p$0$1_$2bt", cardinality, type,
  1012. expected_tag > 0xff ? "2" : "1");
  1013. }
  1014. ent.second = data;
  1015. return true;
  1016. }
  1017. std::vector<TableEntry> FastDecodeTable(const protobuf::Descriptor* message,
  1018. const MessageLayout& layout) {
  1019. std::vector<TableEntry> table;
  1020. for (const auto field : FieldHotnessOrder(message)) {
  1021. TableEntry ent;
  1022. int slot = GetTableSlot(field);
  1023. // std::cerr << "table slot: " << field->number() << ": " << slot << "\n";
  1024. if (slot < 0) {
  1025. // Tag can't fit in the table.
  1026. continue;
  1027. }
  1028. if (!TryFillTableEntry(message, layout, field, ent)) {
  1029. // Unsupported field type or offset, hasbit index, etc. doesn't fit.
  1030. continue;
  1031. }
  1032. while ((size_t)slot >= table.size()) {
  1033. size_t size = std::max(static_cast<size_t>(1), table.size() * 2);
  1034. table.resize(size, TableEntry{"fastdecode_generic", 0});
  1035. }
  1036. if (table[slot].first != "fastdecode_generic") {
  1037. // A hotter field already filled this slot.
  1038. continue;
  1039. }
  1040. table[slot] = ent;
  1041. }
  1042. return table;
  1043. }
  1044. void WriteField(const protobuf::FieldDescriptor* field,
  1045. absl::string_view offset, absl::string_view presence,
  1046. int submsg_index, Output& output) {
  1047. std::string mode;
  1048. std::string rep;
  1049. if (field->is_map()) {
  1050. mode = "kUpb_FieldMode_Map";
  1051. rep = "upb_FieldRep_Pointer";
  1052. } else if (field->is_repeated()) {
  1053. mode = "kUpb_FieldMode_Array";
  1054. rep = "upb_FieldRep_Pointer";
  1055. } else {
  1056. mode = "kUpb_FieldMode_Scalar";
  1057. rep = SizeRep(field);
  1058. }
  1059. if (field->is_packed()) {
  1060. absl::StrAppend(&mode, " | upb_LabelFlags_IsPacked");
  1061. }
  1062. if (field->is_extension()) {
  1063. absl::StrAppend(&mode, " | upb_LabelFlags_IsExtension");
  1064. }
  1065. output("{$0, $1, $2, $3, $4, $5 | ($6 << upb_FieldRep_Shift)}",
  1066. field->number(), offset, presence, submsg_index,
  1067. TableDescriptorType(field), mode, rep);
  1068. }
  1069. // Writes a single field into a .upb.c source file.
  1070. void WriteMessageField(const protobuf::FieldDescriptor* field,
  1071. const MessageLayout& layout, int submsg_index,
  1072. Output& output) {
  1073. std::string presence = "0";
  1074. if (MessageLayout::HasHasbit(field)) {
  1075. int index = layout.GetHasbitIndex(field);
  1076. assert(index != 0);
  1077. presence = absl::StrCat(index);
  1078. } else if (field->real_containing_oneof()) {
  1079. MessageLayout::Size case_offset =
  1080. layout.GetOneofCaseOffset(field->real_containing_oneof());
  1081. // We encode as negative to distinguish from hasbits.
  1082. case_offset.size32 = ~case_offset.size32;
  1083. case_offset.size64 = ~case_offset.size64;
  1084. assert(case_offset.size32 < 0);
  1085. assert(case_offset.size64 < 0);
  1086. presence = GetSizeInit(case_offset);
  1087. }
  1088. output(" ");
  1089. WriteField(field, GetSizeInit(layout.GetFieldOffset(field)), presence,
  1090. submsg_index, output);
  1091. output(",\n");
  1092. }
  1093. // Writes a single message into a .upb.c source file.
  1094. void WriteMessage(const protobuf::Descriptor* message, Output& output,
  1095. bool fasttable_enabled) {
  1096. std::string msg_name = ToCIdent(message->full_name());
  1097. std::string fields_array_ref = "NULL";
  1098. std::string submsgs_array_ref = "NULL";
  1099. std::string subenums_array_ref = "NULL";
  1100. uint8_t dense_below = 0;
  1101. const int dense_below_max = std::numeric_limits<decltype(dense_below)>::max();
  1102. MessageLayout layout(message);
  1103. SubLayoutArray sublayout_array(message);
  1104. if (sublayout_array.total_count()) {
  1105. // TODO(haberman): could save a little bit of space by only generating a
  1106. // "submsgs" array for every strongly-connected component.
  1107. std::string submsgs_array_name = msg_name + "_submsgs";
  1108. submsgs_array_ref = "&" + submsgs_array_name + "[0]";
  1109. output("static const upb_MiniTable_Sub $0[$1] = {\n", submsgs_array_name,
  1110. sublayout_array.total_count());
  1111. for (const auto* submsg : sublayout_array.submsgs()) {
  1112. output(" {.submsg = &$0},\n", MessageInit(submsg));
  1113. }
  1114. for (const auto* subenum : sublayout_array.subenums()) {
  1115. output(" {.subenum = &$0},\n", EnumInit(subenum));
  1116. }
  1117. output("};\n\n");
  1118. }
  1119. std::vector<const protobuf::FieldDescriptor*> field_number_order =
  1120. FieldNumberOrder(message);
  1121. if (!field_number_order.empty()) {
  1122. std::string fields_array_name = msg_name + "__fields";
  1123. fields_array_ref = "&" + fields_array_name + "[0]";
  1124. output("static const upb_MiniTable_Field $0[$1] = {\n", fields_array_name,
  1125. field_number_order.size());
  1126. for (int i = 0; i < static_cast<int>(field_number_order.size()); i++) {
  1127. auto field = field_number_order[i];
  1128. int sublayout_index = 0;
  1129. if (i < dense_below_max && field->number() == i + 1 &&
  1130. (i == 0 || field_number_order[i - 1]->number() == i)) {
  1131. dense_below = i + 1;
  1132. }
  1133. if (field->cpp_type() == protobuf::FieldDescriptor::CPPTYPE_MESSAGE) {
  1134. sublayout_index = sublayout_array.GetIndex(field->message_type());
  1135. } else if (field->enum_type() &&
  1136. field->enum_type()->file()->syntax() ==
  1137. protobuf::FileDescriptor::SYNTAX_PROTO2) {
  1138. sublayout_index = sublayout_array.GetIndex(field->enum_type());
  1139. }
  1140. WriteMessageField(field, layout, sublayout_index, output);
  1141. }
  1142. output("};\n\n");
  1143. }
  1144. std::vector<TableEntry> table;
  1145. uint8_t table_mask = -1;
  1146. if (fasttable_enabled) {
  1147. table = FastDecodeTable(message, layout);
  1148. }
  1149. if (table.size() > 1) {
  1150. assert((table.size() & (table.size() - 1)) == 0);
  1151. table_mask = (table.size() - 1) << 3;
  1152. }
  1153. std::string msgext = "upb_ExtMode_NonExtendable";
  1154. if (message->extension_range_count()) {
  1155. if (message->options().message_set_wire_format()) {
  1156. msgext = "upb_ExtMode_IsMessageSet";
  1157. } else {
  1158. msgext = "upb_ExtMode_Extendable";
  1159. }
  1160. }
  1161. output("const upb_MiniTable $0 = {\n", MessageInit(message));
  1162. output(" $0,\n", submsgs_array_ref);
  1163. output(" $0,\n", fields_array_ref);
  1164. output(" $0, $1, $2, $3, $4, $5,\n", GetSizeInit(layout.message_size()),
  1165. field_number_order.size(), msgext, dense_below, table_mask,
  1166. layout.required_count());
  1167. if (!table.empty()) {
  1168. output(" UPB_FASTTABLE_INIT({\n");
  1169. for (const auto& ent : table) {
  1170. output(" {0x$1, &$0},\n", ent.first,
  1171. absl::StrCat(absl::Hex(ent.second, absl::kZeroPad16)));
  1172. }
  1173. output(" }),\n");
  1174. }
  1175. output("};\n\n");
  1176. }
  1177. int WriteEnums(const protobuf::FileDescriptor* file, Output& output) {
  1178. if (file->syntax() != protobuf::FileDescriptor::SYNTAX_PROTO2) {
  1179. return 0;
  1180. }
  1181. std::vector<const protobuf::EnumDescriptor*> this_file_enums =
  1182. SortedEnums(file);
  1183. std::string values_init = "NULL";
  1184. for (const auto* e : this_file_enums) {
  1185. uint64_t mask = 0;
  1186. absl::flat_hash_set<int32_t> values;
  1187. for (int i = 0; i < e->value_count(); i++) {
  1188. int32_t number = e->value(i)->number();
  1189. if (static_cast<uint32_t>(number) < 64) {
  1190. mask |= 1 << number;
  1191. } else {
  1192. values.insert(number);
  1193. }
  1194. }
  1195. std::vector<int32_t> values_vec(values.begin(), values.end());
  1196. std::sort(values_vec.begin(), values_vec.end());
  1197. if (!values_vec.empty()) {
  1198. values_init = EnumInit(e) + "_values";
  1199. output("static const int32_t $0[$1] = {\n", values_init,
  1200. values_vec.size());
  1201. for (auto value : values_vec) {
  1202. output(" $0,\n", value);
  1203. }
  1204. output("};\n\n");
  1205. }
  1206. output("const upb_MiniTable_Enum $0 = {\n", EnumInit(e));
  1207. output(" $0,\n", values_init);
  1208. output(" 0x$0ULL,\n", absl::Hex(mask));
  1209. output(" $0,\n", values_vec.size());
  1210. output("};\n\n");
  1211. }
  1212. if (!this_file_enums.empty()) {
  1213. output("static const upb_MiniTable_Enum *$0[$1] = {\n", kEnumsInit,
  1214. this_file_enums.size());
  1215. for (const auto* e : this_file_enums) {
  1216. output(" &$0,\n", EnumInit(e));
  1217. }
  1218. output("};\n");
  1219. output("\n");
  1220. }
  1221. return this_file_enums.size();
  1222. }
  1223. int WriteMessages(const protobuf::FileDescriptor* file, Output& output,
  1224. bool fasttable_enabled) {
  1225. std::vector<const protobuf::Descriptor*> file_messages = SortedMessages(file);
  1226. if (file_messages.empty()) return 0;
  1227. for (auto message : file_messages) {
  1228. WriteMessage(message, output, fasttable_enabled);
  1229. }
  1230. output("static const upb_MiniTable *$0[$1] = {\n", kMessagesInit,
  1231. file_messages.size());
  1232. for (auto message : file_messages) {
  1233. output(" &$0,\n", MessageInit(message));
  1234. }
  1235. output("};\n");
  1236. output("\n");
  1237. return file_messages.size();
  1238. }
  1239. void WriteExtension(const protobuf::FieldDescriptor* ext, Output& output) {
  1240. output("const upb_MiniTable_Extension $0 = {\n ", ExtensionLayout(ext));
  1241. WriteField(ext, "0", "0", 0, output);
  1242. output(",\n");
  1243. output(" &$0,\n", MessageInit(ext->containing_type()));
  1244. if (ext->message_type()) {
  1245. output(" {.submsg = &$0},\n", MessageInit(ext->message_type()));
  1246. } else if (ext->enum_type() && ext->enum_type()->file()->syntax() ==
  1247. protobuf::FileDescriptor::SYNTAX_PROTO2) {
  1248. output(" {.subenum = &$0},\n", EnumInit(ext->enum_type()));
  1249. } else {
  1250. output(" {.submsg = NULL},\n");
  1251. }
  1252. output("\n};\n");
  1253. }
  1254. int WriteExtensions(const protobuf::FileDescriptor* file, Output& output) {
  1255. auto exts = SortedExtensions(file);
  1256. absl::flat_hash_set<const protobuf::Descriptor*> forward_decls;
  1257. if (exts.empty()) return 0;
  1258. // Order by full name for consistent ordering.
  1259. std::map<std::string, const protobuf::Descriptor*> forward_messages;
  1260. for (auto ext : exts) {
  1261. forward_messages[ext->containing_type()->full_name()] =
  1262. ext->containing_type();
  1263. if (ext->message_type()) {
  1264. forward_messages[ext->message_type()->full_name()] = ext->message_type();
  1265. }
  1266. }
  1267. for (const auto& decl : forward_messages) {
  1268. output("extern const upb_MiniTable $0;\n", MessageInit(decl.second));
  1269. }
  1270. for (auto ext : exts) {
  1271. WriteExtension(ext, output);
  1272. }
  1273. output(
  1274. "\n"
  1275. "static const upb_MiniTable_Extension *$0[$1] = {\n",
  1276. kExtensionsInit, exts.size());
  1277. for (auto ext : exts) {
  1278. output(" &$0,\n", ExtensionLayout(ext));
  1279. }
  1280. output(
  1281. "};\n"
  1282. "\n");
  1283. return exts.size();
  1284. }
  1285. // Writes a .upb.c source file.
  1286. void WriteSource(const protobuf::FileDescriptor* file, Output& output,
  1287. bool fasttable_enabled) {
  1288. EmitFileWarning(file, output);
  1289. output(
  1290. "#include <stddef.h>\n"
  1291. "#include \"upb/msg_internal.h\"\n"
  1292. "#include \"$0\"\n",
  1293. HeaderFilename(file));
  1294. for (int i = 0; i < file->dependency_count(); i++) {
  1295. output("#include \"$0\"\n", HeaderFilename(file->dependency(i)));
  1296. }
  1297. output(
  1298. "\n"
  1299. "#include \"upb/port_def.inc\"\n"
  1300. "\n");
  1301. int msg_count = WriteMessages(file, output, fasttable_enabled);
  1302. int ext_count = WriteExtensions(file, output);
  1303. int enum_count = WriteEnums(file, output);
  1304. output("const upb_MiniTable_File $0 = {\n", FileLayoutName(file));
  1305. output(" $0,\n", msg_count ? kMessagesInit : "NULL");
  1306. output(" $0,\n", enum_count ? kEnumsInit : "NULL");
  1307. output(" $0,\n", ext_count ? kExtensionsInit : "NULL");
  1308. output(" $0,\n", msg_count);
  1309. output(" $0,\n", enum_count);
  1310. output(" $0,\n", ext_count);
  1311. output("};\n\n");
  1312. output("#include \"upb/port_undef.inc\"\n");
  1313. output("\n");
  1314. }
  1315. class Generator : public protoc::CodeGenerator {
  1316. ~Generator() override {}
  1317. bool Generate(const protobuf::FileDescriptor* file,
  1318. const std::string& parameter, protoc::GeneratorContext* context,
  1319. std::string* error) const override;
  1320. uint64_t GetSupportedFeatures() const override {
  1321. return FEATURE_PROTO3_OPTIONAL;
  1322. }
  1323. };
  1324. bool Generator::Generate(const protobuf::FileDescriptor* file,
  1325. const std::string& parameter,
  1326. protoc::GeneratorContext* context,
  1327. std::string* error) const {
  1328. bool fasttable_enabled = false;
  1329. std::vector<std::pair<std::string, std::string>> params;
  1330. google::protobuf::compiler::ParseGeneratorParameter(parameter, &params);
  1331. for (const auto& pair : params) {
  1332. if (pair.first == "fasttable") {
  1333. fasttable_enabled = true;
  1334. } else {
  1335. *error = "Unknown parameter: " + pair.first;
  1336. return false;
  1337. }
  1338. }
  1339. Output h_output(context->Open(HeaderFilename(file)));
  1340. WriteHeader(file, h_output);
  1341. Output c_output(context->Open(SourceFilename(file)));
  1342. WriteSource(file, c_output, fasttable_enabled);
  1343. return true;
  1344. }
  1345. } // namespace
  1346. } // namespace upbc
  1347. int main(int argc, char** argv) {
  1348. std::unique_ptr<google::protobuf::compiler::CodeGenerator> generator(
  1349. new upbc::Generator());
  1350. return google::protobuf::compiler::PluginMain(argc, argv, generator.get());
  1351. }