convert.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // https://developers.google.com/protocol-buffers/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // -----------------------------------------------------------------------------
  31. // Ruby <-> upb data conversion functions.
  32. //
  33. // This file Also contains a few other assorted algorithms on upb_msgval.
  34. //
  35. // None of the algorithms in this file require any access to the internal
  36. // representation of Ruby or upb objects.
  37. // -----------------------------------------------------------------------------
  38. #include "convert.h"
  39. #include "message.h"
  40. #include "protobuf.h"
  41. static upb_strview Convert_StringData(VALUE str, upb_arena *arena) {
  42. upb_strview ret;
  43. if (arena) {
  44. char *ptr = upb_arena_malloc(arena, RSTRING_LEN(str));
  45. memcpy(ptr, RSTRING_PTR(str), RSTRING_LEN(str));
  46. ret.data = ptr;
  47. } else {
  48. // Data is only needed temporarily (within map lookup).
  49. ret.data = RSTRING_PTR(str);
  50. }
  51. ret.size = RSTRING_LEN(str);
  52. return ret;
  53. }
  54. static bool is_ruby_num(VALUE value) {
  55. return (TYPE(value) == T_FLOAT ||
  56. TYPE(value) == T_FIXNUM ||
  57. TYPE(value) == T_BIGNUM);
  58. }
  59. static void Convert_CheckInt(const char* name, upb_fieldtype_t type,
  60. VALUE val) {
  61. if (!is_ruby_num(val)) {
  62. rb_raise(cTypeError,
  63. "Expected number type for integral field '%s' (given %s).", name,
  64. rb_class2name(CLASS_OF(val)));
  65. }
  66. // NUM2{INT,UINT,LL,ULL} macros do the appropriate range checks on upper
  67. // bound; we just need to do precision checks (i.e., disallow rounding) and
  68. // check for < 0 on unsigned types.
  69. if (TYPE(val) == T_FLOAT) {
  70. double dbl_val = NUM2DBL(val);
  71. if (floor(dbl_val) != dbl_val) {
  72. rb_raise(rb_eRangeError,
  73. "Non-integral floating point value assigned to integer field "
  74. "'%s' (given %s).",
  75. name, rb_class2name(CLASS_OF(val)));
  76. }
  77. }
  78. if (type == UPB_TYPE_UINT32 || type == UPB_TYPE_UINT64) {
  79. if (NUM2DBL(val) < 0) {
  80. rb_raise(
  81. rb_eRangeError,
  82. "Assigning negative value to unsigned integer field '%s' (given %s).",
  83. name, rb_class2name(CLASS_OF(val)));
  84. }
  85. }
  86. }
  87. static int32_t Convert_ToEnum(VALUE value, const char* name,
  88. const upb_enumdef* e) {
  89. int32_t val;
  90. switch (TYPE(value)) {
  91. case T_FLOAT:
  92. case T_FIXNUM:
  93. case T_BIGNUM:
  94. Convert_CheckInt(name, UPB_TYPE_INT32, value);
  95. val = NUM2INT(value);
  96. break;
  97. case T_STRING:
  98. if (!upb_enumdef_ntoi(e, RSTRING_PTR(value), RSTRING_LEN(value), &val)) {
  99. goto unknownval;
  100. }
  101. break;
  102. case T_SYMBOL:
  103. if (!upb_enumdef_ntoiz(e, rb_id2name(SYM2ID(value)), &val)) {
  104. goto unknownval;
  105. }
  106. break;
  107. default:
  108. rb_raise(cTypeError,
  109. "Expected number or symbol type for enum field '%s'.", name);
  110. }
  111. return val;
  112. unknownval:
  113. rb_raise(rb_eRangeError, "Unknown symbol value for enum field '%s'.", name);
  114. }
  115. upb_msgval Convert_RubyToUpb(VALUE value, const char* name, TypeInfo type_info,
  116. upb_arena* arena) {
  117. upb_msgval ret;
  118. switch (type_info.type) {
  119. case UPB_TYPE_FLOAT:
  120. if (!is_ruby_num(value)) {
  121. rb_raise(cTypeError, "Expected number type for float field '%s' (given %s).",
  122. name, rb_class2name(CLASS_OF(value)));
  123. }
  124. ret.float_val = NUM2DBL(value);
  125. break;
  126. case UPB_TYPE_DOUBLE:
  127. if (!is_ruby_num(value)) {
  128. rb_raise(cTypeError, "Expected number type for double field '%s' (given %s).",
  129. name, rb_class2name(CLASS_OF(value)));
  130. }
  131. ret.double_val = NUM2DBL(value);
  132. break;
  133. case UPB_TYPE_BOOL: {
  134. if (value == Qtrue) {
  135. ret.bool_val = 1;
  136. } else if (value == Qfalse) {
  137. ret.bool_val = 0;
  138. } else {
  139. rb_raise(cTypeError, "Invalid argument for boolean field '%s' (given %s).",
  140. name, rb_class2name(CLASS_OF(value)));
  141. }
  142. break;
  143. }
  144. case UPB_TYPE_STRING: {
  145. VALUE utf8 = rb_enc_from_encoding(rb_utf8_encoding());
  146. if (CLASS_OF(value) == rb_cSymbol) {
  147. value = rb_funcall(value, rb_intern("to_s"), 0);
  148. } else if (CLASS_OF(value) != rb_cString) {
  149. rb_raise(cTypeError, "Invalid argument for string field '%s' (given %s).",
  150. name, rb_class2name(CLASS_OF(value)));
  151. }
  152. if (rb_obj_encoding(value) != utf8) {
  153. // Note: this will not duplicate underlying string data unless necessary.
  154. value = rb_str_encode(value, utf8, 0, Qnil);
  155. if (rb_enc_str_coderange(value) == ENC_CODERANGE_BROKEN) {
  156. rb_raise(rb_eEncodingError, "String is invalid UTF-8");
  157. }
  158. }
  159. ret.str_val = Convert_StringData(value, arena);
  160. break;
  161. }
  162. case UPB_TYPE_BYTES: {
  163. VALUE bytes = rb_enc_from_encoding(rb_ascii8bit_encoding());
  164. if (CLASS_OF(value) != rb_cString) {
  165. rb_raise(cTypeError, "Invalid argument for bytes field '%s' (given %s).",
  166. name, rb_class2name(CLASS_OF(value)));
  167. }
  168. if (rb_obj_encoding(value) != bytes) {
  169. // Note: this will not duplicate underlying string data unless necessary.
  170. // TODO(haberman): is this really necessary to get raw bytes?
  171. value = rb_str_encode(value, bytes, 0, Qnil);
  172. }
  173. ret.str_val = Convert_StringData(value, arena);
  174. break;
  175. }
  176. case UPB_TYPE_MESSAGE:
  177. ret.msg_val =
  178. Message_GetUpbMessage(value, type_info.def.msgdef, name, arena);
  179. break;
  180. case UPB_TYPE_ENUM:
  181. ret.int32_val = Convert_ToEnum(value, name, type_info.def.enumdef);
  182. break;
  183. case UPB_TYPE_INT32:
  184. case UPB_TYPE_INT64:
  185. case UPB_TYPE_UINT32:
  186. case UPB_TYPE_UINT64:
  187. Convert_CheckInt(name, type_info.type, value);
  188. switch (type_info.type) {
  189. case UPB_TYPE_INT32:
  190. ret.int32_val = NUM2INT(value);
  191. break;
  192. case UPB_TYPE_INT64:
  193. ret.int64_val = NUM2LL(value);
  194. break;
  195. case UPB_TYPE_UINT32:
  196. ret.uint32_val = NUM2UINT(value);
  197. break;
  198. case UPB_TYPE_UINT64:
  199. ret.uint64_val = NUM2ULL(value);
  200. break;
  201. default:
  202. break;
  203. }
  204. break;
  205. default:
  206. break;
  207. }
  208. return ret;
  209. }
  210. VALUE Convert_UpbToRuby(upb_msgval upb_val, TypeInfo type_info, VALUE arena) {
  211. switch (type_info.type) {
  212. case UPB_TYPE_FLOAT:
  213. return DBL2NUM(upb_val.float_val);
  214. case UPB_TYPE_DOUBLE:
  215. return DBL2NUM(upb_val.double_val);
  216. case UPB_TYPE_BOOL:
  217. return upb_val.bool_val ? Qtrue : Qfalse;
  218. case UPB_TYPE_INT32:
  219. return INT2NUM(upb_val.int32_val);
  220. case UPB_TYPE_INT64:
  221. return LL2NUM(upb_val.int64_val);
  222. case UPB_TYPE_UINT32:
  223. return UINT2NUM(upb_val.uint32_val);
  224. case UPB_TYPE_UINT64:
  225. return ULL2NUM(upb_val.int64_val);
  226. case UPB_TYPE_ENUM: {
  227. const char* name =
  228. upb_enumdef_iton(type_info.def.enumdef, upb_val.int32_val);
  229. if (name) {
  230. return ID2SYM(rb_intern(name));
  231. } else {
  232. return INT2NUM(upb_val.int32_val);
  233. }
  234. }
  235. case UPB_TYPE_STRING: {
  236. VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
  237. rb_enc_associate(str_rb, rb_utf8_encoding());
  238. rb_obj_freeze(str_rb);
  239. return str_rb;
  240. }
  241. case UPB_TYPE_BYTES: {
  242. VALUE str_rb = rb_str_new(upb_val.str_val.data, upb_val.str_val.size);
  243. rb_enc_associate(str_rb, rb_ascii8bit_encoding());
  244. rb_obj_freeze(str_rb);
  245. return str_rb;
  246. }
  247. case UPB_TYPE_MESSAGE:
  248. return Message_GetRubyWrapper((upb_msg*)upb_val.msg_val,
  249. type_info.def.msgdef, arena);
  250. default:
  251. rb_raise(rb_eRuntimeError, "Convert_UpbToRuby(): Unexpected type %d",
  252. (int)type_info.type);
  253. }
  254. }
  255. upb_msgval Msgval_DeepCopy(upb_msgval msgval, TypeInfo type_info,
  256. upb_arena* arena) {
  257. upb_msgval new_msgval;
  258. switch (type_info.type) {
  259. default:
  260. memcpy(&new_msgval, &msgval, sizeof(msgval));
  261. break;
  262. case UPB_TYPE_STRING:
  263. case UPB_TYPE_BYTES: {
  264. size_t n = msgval.str_val.size;
  265. char *mem = upb_arena_malloc(arena, n);
  266. new_msgval.str_val.data = mem;
  267. new_msgval.str_val.size = n;
  268. memcpy(mem, msgval.str_val.data, n);
  269. break;
  270. }
  271. case UPB_TYPE_MESSAGE:
  272. new_msgval.msg_val =
  273. Message_deep_copy(msgval.msg_val, type_info.def.msgdef, arena);
  274. break;
  275. }
  276. return new_msgval;
  277. }
  278. bool Msgval_IsEqual(upb_msgval val1, upb_msgval val2, TypeInfo type_info) {
  279. switch (type_info.type) {
  280. case UPB_TYPE_BOOL:
  281. return memcmp(&val1, &val2, 1) == 0;
  282. case UPB_TYPE_FLOAT:
  283. case UPB_TYPE_INT32:
  284. case UPB_TYPE_UINT32:
  285. case UPB_TYPE_ENUM:
  286. return memcmp(&val1, &val2, 4) == 0;
  287. case UPB_TYPE_DOUBLE:
  288. case UPB_TYPE_INT64:
  289. case UPB_TYPE_UINT64:
  290. return memcmp(&val1, &val2, 8) == 0;
  291. case UPB_TYPE_STRING:
  292. case UPB_TYPE_BYTES:
  293. return val1.str_val.size == val2.str_val.size &&
  294. memcmp(val1.str_val.data, val2.str_val.data,
  295. val1.str_val.size) == 0;
  296. case UPB_TYPE_MESSAGE:
  297. return Message_Equal(val1.msg_val, val2.msg_val, type_info.def.msgdef);
  298. default:
  299. rb_raise(rb_eRuntimeError, "Internal error, unexpected type");
  300. }
  301. }
  302. uint64_t Msgval_GetHash(upb_msgval val, TypeInfo type_info, uint64_t seed) {
  303. switch (type_info.type) {
  304. case UPB_TYPE_BOOL:
  305. return Wyhash(&val, 1, seed, kWyhashSalt);
  306. case UPB_TYPE_FLOAT:
  307. case UPB_TYPE_INT32:
  308. case UPB_TYPE_UINT32:
  309. case UPB_TYPE_ENUM:
  310. return Wyhash(&val, 4, seed, kWyhashSalt);
  311. case UPB_TYPE_DOUBLE:
  312. case UPB_TYPE_INT64:
  313. case UPB_TYPE_UINT64:
  314. return Wyhash(&val, 8, seed, kWyhashSalt);
  315. case UPB_TYPE_STRING:
  316. case UPB_TYPE_BYTES:
  317. return Wyhash(val.str_val.data, val.str_val.size, seed, kWyhashSalt);
  318. case UPB_TYPE_MESSAGE:
  319. return Message_Hash(val.msg_val, type_info.def.msgdef, seed);
  320. default:
  321. rb_raise(rb_eRuntimeError, "Internal error, unexpected type");
  322. }
  323. }