convert.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /*
  2. * Copyright (c) 2009-2021, Google LLC
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. * * Redistributions of source code must retain the above copyright
  8. * notice, this list of conditions and the following disclaimer.
  9. * * Redistributions in binary form must reproduce the above copyright
  10. * notice, this list of conditions and the following disclaimer in the
  11. * documentation and/or other materials provided with the distribution.
  12. * * Neither the name of Google LLC nor the
  13. * names of its contributors may be used to endorse or promote products
  14. * derived from this software without specific prior written permission.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
  20. * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  21. * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  22. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  23. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  25. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include "python/convert.h"
  28. #include "python/message.h"
  29. #include "python/protobuf.h"
  30. #include "upb/reflection.h"
  31. #include "upb/util/compare.h"
  32. PyObject* PyUpb_UpbToPy(upb_MessageValue val, const upb_FieldDef* f,
  33. PyObject* arena) {
  34. switch (upb_FieldDef_CType(f)) {
  35. case kUpb_CType_Enum:
  36. case kUpb_CType_Int32:
  37. return PyLong_FromLong(val.int32_val);
  38. case kUpb_CType_Int64:
  39. return PyLong_FromLongLong(val.int64_val);
  40. case kUpb_CType_UInt32:
  41. return PyLong_FromSize_t(val.uint32_val);
  42. case kUpb_CType_UInt64:
  43. return PyLong_FromUnsignedLongLong(val.uint64_val);
  44. case kUpb_CType_Float:
  45. return PyFloat_FromDouble(val.float_val);
  46. case kUpb_CType_Double:
  47. return PyFloat_FromDouble(val.double_val);
  48. case kUpb_CType_Bool:
  49. return PyBool_FromLong(val.bool_val);
  50. case kUpb_CType_Bytes:
  51. return PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
  52. case kUpb_CType_String: {
  53. PyObject* ret =
  54. PyUnicode_DecodeUTF8(val.str_val.data, val.str_val.size, NULL);
  55. // If the string can't be decoded in UTF-8, just return a bytes object
  56. // that contains the raw bytes. This can't happen if the value was
  57. // assigned using the members of the Python message object, but can happen
  58. // if the values were parsed from the wire (binary).
  59. if (ret == NULL) {
  60. PyErr_Clear();
  61. ret = PyBytes_FromStringAndSize(val.str_val.data, val.str_val.size);
  62. }
  63. return ret;
  64. }
  65. case kUpb_CType_Message:
  66. return PyUpb_CMessage_Get((upb_Message*)val.msg_val,
  67. upb_FieldDef_MessageSubDef(f), arena);
  68. default:
  69. PyErr_Format(PyExc_SystemError,
  70. "Getting a value from a field of unknown type %d",
  71. upb_FieldDef_CType(f));
  72. return NULL;
  73. }
  74. }
  75. static bool PyUpb_GetInt64(PyObject* obj, int64_t* val) {
  76. // We require that the value is either an integer or has an __index__
  77. // conversion.
  78. obj = PyNumber_Index(obj);
  79. if (!obj) return false;
  80. // If the value is already a Python long, PyLong_AsLongLong() retrieves it.
  81. // Otherwise is converts to integer using __int__.
  82. *val = PyLong_AsLongLong(obj);
  83. bool ok = true;
  84. if (PyErr_Occurred()) {
  85. assert(PyErr_ExceptionMatches(PyExc_OverflowError));
  86. PyErr_Clear();
  87. PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
  88. ok = false;
  89. }
  90. Py_DECREF(obj);
  91. return ok;
  92. }
  93. static bool PyUpb_GetUint64(PyObject* obj, uint64_t* val) {
  94. // We require that the value is either an integer or has an __index__
  95. // conversion.
  96. obj = PyNumber_Index(obj);
  97. if (!obj) return false;
  98. *val = PyLong_AsUnsignedLongLong(obj);
  99. bool ok = true;
  100. if (PyErr_Occurred()) {
  101. assert(PyErr_ExceptionMatches(PyExc_OverflowError));
  102. PyErr_Clear();
  103. PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
  104. ok = false;
  105. }
  106. Py_DECREF(obj);
  107. return ok;
  108. }
  109. static bool PyUpb_GetInt32(PyObject* obj, int32_t* val) {
  110. int64_t i64;
  111. if (!PyUpb_GetInt64(obj, &i64)) return false;
  112. if (i64 < INT32_MIN || i64 > INT32_MAX) {
  113. PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
  114. return false;
  115. }
  116. *val = i64;
  117. return true;
  118. }
  119. static bool PyUpb_GetUint32(PyObject* obj, uint32_t* val) {
  120. uint64_t u64;
  121. if (!PyUpb_GetUint64(obj, &u64)) return false;
  122. if (u64 > UINT32_MAX) {
  123. PyErr_Format(PyExc_ValueError, "Value out of range: %S", obj);
  124. return false;
  125. }
  126. *val = u64;
  127. return true;
  128. }
  129. // If `arena` is specified, copies the string data into the given arena.
  130. // Otherwise aliases the given data.
  131. static upb_MessageValue PyUpb_MaybeCopyString(const char* ptr, size_t size,
  132. upb_Arena* arena) {
  133. upb_MessageValue ret;
  134. ret.str_val.size = size;
  135. if (arena) {
  136. char* buf = upb_Arena_Malloc(arena, size);
  137. memcpy(buf, ptr, size);
  138. ret.str_val.data = buf;
  139. } else {
  140. ret.str_val.data = ptr;
  141. }
  142. return ret;
  143. }
  144. static bool PyUpb_PyToUpbEnum(PyObject* obj, const upb_EnumDef* e,
  145. upb_MessageValue* val) {
  146. if (PyUnicode_Check(obj)) {
  147. Py_ssize_t size;
  148. const char* name = PyUnicode_AsUTF8AndSize(obj, &size);
  149. const upb_EnumValueDef* ev =
  150. upb_EnumDef_FindValueByNameWithSize(e, name, size);
  151. if (!ev) {
  152. PyErr_Format(PyExc_ValueError, "unknown enum label \"%s\"", name);
  153. return false;
  154. }
  155. val->int32_val = upb_EnumValueDef_Number(ev);
  156. return true;
  157. } else {
  158. int32_t i32;
  159. if (!PyUpb_GetInt32(obj, &i32)) return false;
  160. if (upb_FileDef_Syntax(upb_EnumDef_File(e)) == kUpb_Syntax_Proto2 &&
  161. !upb_EnumDef_CheckNumber(e, i32)) {
  162. PyErr_Format(PyExc_ValueError, "invalid enumerator %d", (int)i32);
  163. return false;
  164. }
  165. val->int32_val = i32;
  166. return true;
  167. }
  168. }
  169. bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
  170. upb_Arena* arena) {
  171. switch (upb_FieldDef_CType(f)) {
  172. case kUpb_CType_Enum:
  173. return PyUpb_PyToUpbEnum(obj, upb_FieldDef_EnumSubDef(f), val);
  174. case kUpb_CType_Int32:
  175. return PyUpb_GetInt32(obj, &val->int32_val);
  176. case kUpb_CType_Int64:
  177. return PyUpb_GetInt64(obj, &val->int64_val);
  178. case kUpb_CType_UInt32:
  179. return PyUpb_GetUint32(obj, &val->uint32_val);
  180. case kUpb_CType_UInt64:
  181. return PyUpb_GetUint64(obj, &val->uint64_val);
  182. case kUpb_CType_Float:
  183. val->float_val = PyFloat_AsDouble(obj);
  184. return !PyErr_Occurred();
  185. case kUpb_CType_Double:
  186. val->double_val = PyFloat_AsDouble(obj);
  187. return !PyErr_Occurred();
  188. case kUpb_CType_Bool:
  189. val->bool_val = PyLong_AsLong(obj);
  190. return !PyErr_Occurred();
  191. case kUpb_CType_Bytes: {
  192. char* ptr;
  193. Py_ssize_t size;
  194. if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
  195. *val = PyUpb_MaybeCopyString(ptr, size, arena);
  196. return true;
  197. }
  198. case kUpb_CType_String: {
  199. Py_ssize_t size;
  200. const char* ptr;
  201. PyObject* unicode = NULL;
  202. if (PyBytes_Check(obj)) {
  203. unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
  204. if (!obj) return false;
  205. }
  206. ptr = PyUnicode_AsUTF8AndSize(obj, &size);
  207. if (PyErr_Occurred()) {
  208. Py_XDECREF(unicode);
  209. return false;
  210. }
  211. *val = PyUpb_MaybeCopyString(ptr, size, arena);
  212. Py_XDECREF(unicode);
  213. return true;
  214. }
  215. case kUpb_CType_Message:
  216. PyErr_Format(PyExc_ValueError, "Message objects may not be assigned",
  217. upb_FieldDef_CType(f));
  218. return false;
  219. default:
  220. PyErr_Format(PyExc_SystemError,
  221. "Getting a value from a field of unknown type %d",
  222. upb_FieldDef_CType(f));
  223. return false;
  224. }
  225. }
  226. bool PyUpb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
  227. const upb_MessageDef* m);
  228. // -----------------------------------------------------------------------------
  229. // Equal
  230. // -----------------------------------------------------------------------------
  231. bool PyUpb_ValueEq(upb_MessageValue val1, upb_MessageValue val2,
  232. const upb_FieldDef* f) {
  233. switch (upb_FieldDef_CType(f)) {
  234. case kUpb_CType_Bool:
  235. return val1.bool_val == val2.bool_val;
  236. case kUpb_CType_Int32:
  237. case kUpb_CType_UInt32:
  238. case kUpb_CType_Enum:
  239. return val1.int32_val == val2.int32_val;
  240. case kUpb_CType_Int64:
  241. case kUpb_CType_UInt64:
  242. return val1.int64_val == val2.int64_val;
  243. case kUpb_CType_Float:
  244. return val1.float_val == val2.float_val;
  245. case kUpb_CType_Double:
  246. return val1.double_val == val2.double_val;
  247. case kUpb_CType_String:
  248. case kUpb_CType_Bytes:
  249. return val1.str_val.size == val2.str_val.size &&
  250. memcmp(val1.str_val.data, val2.str_val.data, val1.str_val.size) ==
  251. 0;
  252. case kUpb_CType_Message:
  253. return PyUpb_Message_IsEqual(val1.msg_val, val2.msg_val,
  254. upb_FieldDef_MessageSubDef(f));
  255. default:
  256. return false;
  257. }
  258. }
  259. bool PyUpb_Map_IsEqual(const upb_Map* map1, const upb_Map* map2,
  260. const upb_FieldDef* f) {
  261. assert(upb_FieldDef_IsMap(f));
  262. if (map1 == map2) return true;
  263. size_t size1 = map1 ? upb_Map_Size(map1) : 0;
  264. size_t size2 = map2 ? upb_Map_Size(map2) : 0;
  265. if (size1 != size2) return false;
  266. if (size1 == 0) return true;
  267. const upb_MessageDef* entry_m = upb_FieldDef_MessageSubDef(f);
  268. const upb_FieldDef* val_f = upb_MessageDef_Field(entry_m, 1);
  269. size_t iter = kUpb_Map_Begin;
  270. while (upb_MapIterator_Next(map1, &iter)) {
  271. upb_MessageValue key = upb_MapIterator_Key(map1, iter);
  272. upb_MessageValue val1 = upb_MapIterator_Value(map1, iter);
  273. upb_MessageValue val2;
  274. if (!upb_Map_Get(map2, key, &val2)) return false;
  275. if (!PyUpb_ValueEq(val1, val2, val_f)) return false;
  276. }
  277. return true;
  278. }
  279. static bool PyUpb_ArrayElem_IsEqual(const upb_Array* arr1,
  280. const upb_Array* arr2, size_t i,
  281. const upb_FieldDef* f) {
  282. assert(i < upb_Array_Size(arr1));
  283. assert(i < upb_Array_Size(arr2));
  284. upb_MessageValue val1 = upb_Array_Get(arr1, i);
  285. upb_MessageValue val2 = upb_Array_Get(arr2, i);
  286. return PyUpb_ValueEq(val1, val2, f);
  287. }
  288. bool PyUpb_Array_IsEqual(const upb_Array* arr1, const upb_Array* arr2,
  289. const upb_FieldDef* f) {
  290. assert(upb_FieldDef_IsRepeated(f) && !upb_FieldDef_IsMap(f));
  291. if (arr1 == arr2) return true;
  292. size_t n1 = arr1 ? upb_Array_Size(arr1) : 0;
  293. size_t n2 = arr2 ? upb_Array_Size(arr2) : 0;
  294. if (n1 != n2) return false;
  295. // Half the length rounded down. Important: the empty list rounds to 0.
  296. size_t half = n1 / 2;
  297. // Search from the ends-in. We expect differences to more quickly manifest
  298. // at the ends than in the middle. If the length is odd we will miss the
  299. // middle element.
  300. for (size_t i = 0; i < half; i++) {
  301. if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, i, f)) return false;
  302. if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, n1 - 1 - i, f)) return false;
  303. }
  304. // For an odd-lengthed list, pick up the middle element.
  305. if (n1 & 1) {
  306. if (!PyUpb_ArrayElem_IsEqual(arr1, arr2, half, f)) return false;
  307. }
  308. return true;
  309. }
  310. bool PyUpb_Message_IsEqual(const upb_Message* msg1, const upb_Message* msg2,
  311. const upb_MessageDef* m) {
  312. if (msg1 == msg2) return true;
  313. if (upb_Message_ExtensionCount(msg1) != upb_Message_ExtensionCount(msg2))
  314. return false;
  315. // Compare messages field-by-field. This is slightly tricky, because while
  316. // we can iterate over normal fields in a predictable order, the extension
  317. // order is unpredictable and may be different between msg1 and msg2.
  318. // So we use the following strategy:
  319. // 1. Iterate over all msg1 fields (including extensions).
  320. // 2. For non-extension fields, we find the corresponding field by simply
  321. // using upb_Message_Next(msg2). If the two messages have the same set
  322. // of fields, this will yield the same field.
  323. // 3. For extension fields, we have to actually search for the corresponding
  324. // field, which we do with upb_Message_Get(msg2, ext_f1).
  325. // 4. Once iteration over msg1 is complete, we call upb_Message_Next(msg2)
  326. // one
  327. // final time to verify that we have visited all of msg2's regular fields
  328. // (we pass NULL for ext_dict so that iteration will *not* return
  329. // extensions).
  330. //
  331. // We don't need to visit all of msg2's extensions, because we verified up
  332. // front that both messages have the same number of extensions.
  333. const upb_DefPool* symtab = upb_FileDef_Pool(upb_MessageDef_File(m));
  334. const upb_FieldDef *f1, *f2;
  335. upb_MessageValue val1, val2;
  336. size_t iter1 = kUpb_Message_Begin;
  337. size_t iter2 = kUpb_Message_Begin;
  338. while (upb_Message_Next(msg1, m, symtab, &f1, &val1, &iter1)) {
  339. if (upb_FieldDef_IsExtension(f1)) {
  340. val2 = upb_Message_Get(msg2, f1);
  341. } else {
  342. if (!upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2) || f1 != f2) {
  343. return false;
  344. }
  345. }
  346. if (upb_FieldDef_IsMap(f1)) {
  347. if (!PyUpb_Map_IsEqual(val1.map_val, val2.map_val, f1)) return false;
  348. } else if (upb_FieldDef_IsRepeated(f1)) {
  349. if (!PyUpb_Array_IsEqual(val1.array_val, val2.array_val, f1)) {
  350. return false;
  351. }
  352. } else {
  353. if (!PyUpb_ValueEq(val1, val2, f1)) return false;
  354. }
  355. }
  356. if (upb_Message_Next(msg2, m, NULL, &f2, &val2, &iter2)) return false;
  357. size_t usize1, usize2;
  358. const char* uf1 = upb_Message_GetUnknown(msg1, &usize1);
  359. const char* uf2 = upb_Message_GetUnknown(msg2, &usize2);
  360. // 100 is arbitrary, we're trying to prevent stack overflow but it's not
  361. // obvious how deep we should allow here.
  362. return upb_Message_UnknownFieldsAreEqual(uf1, usize1, uf2, usize2, 100) ==
  363. kUpb_UnknownCompareResult_Equal;
  364. }