result_parser.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. # This import depends on the automake rule protoc_middleman, please make sure
  2. # protoc_middleman has been built before run this file.
  3. import argparse
  4. import json
  5. import re
  6. import os.path
  7. # BEGIN OPENSOURCE
  8. import sys
  9. sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
  10. # END OPENSOURCE
  11. import tmp.benchmarks_pb2 as benchmarks_pb2
  12. __file_size_map = {}
  13. def __get_data_size(filename):
  14. if filename[0] != '/':
  15. filename = os.path.dirname(os.path.abspath(__file__)) + "/../" + filename
  16. if filename in __file_size_map:
  17. return __file_size_map[filename]
  18. benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
  19. benchmark_dataset.ParseFromString(
  20. open(filename, "rb").read())
  21. size = 0
  22. count = 0
  23. for payload in benchmark_dataset.payload:
  24. size += len(payload)
  25. count += 1
  26. __file_size_map[filename] = (size, 1.0 * size / count)
  27. return size, 1.0 * size / count
  28. def __extract_file_name(file_name):
  29. name_list = re.split(r"[/\.]", file_name)
  30. short_file_name = ""
  31. for name in name_list:
  32. if name[:14] == "google_message":
  33. short_file_name = name
  34. return short_file_name
  35. __results = []
  36. # CPP results example:
  37. # [
  38. # "benchmarks": [
  39. # {
  40. # "bytes_per_second": int,
  41. # "cpu_time_ns": double,
  42. # "iterations": int,
  43. # "name: string,
  44. # "real_time_ns: double,
  45. # ...
  46. # },
  47. # ...
  48. # ],
  49. # ...
  50. # ]
  51. def __parse_cpp_result(filename):
  52. if filename == "":
  53. return
  54. if filename[0] != '/':
  55. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  56. with open(filename, encoding="utf-8") as f:
  57. results = json.loads(f.read())
  58. for benchmark in results["benchmarks"]:
  59. data_filename = "".join(
  60. re.split("(_parse_|_serialize)", benchmark["name"])[0])
  61. behavior = benchmark["name"][len(data_filename) + 1:]
  62. if data_filename[:2] == "BM":
  63. data_filename = data_filename[3:]
  64. __results.append({
  65. "language": "cpp",
  66. "dataFilename": data_filename,
  67. "behavior": behavior,
  68. "throughput": benchmark["bytes_per_second"] / 2.0 ** 20
  69. })
  70. # Synthetic benchmark results example:
  71. # [
  72. # "benchmarks": [
  73. # {
  74. # "cpu_time_ns": double,
  75. # "iterations": int,
  76. # "name: string,
  77. # "real_time_ns: double,
  78. # ...
  79. # },
  80. # ...
  81. # ],
  82. # ...
  83. # ]
  84. def __parse_synthetic_result(filename):
  85. if filename == "":
  86. return
  87. if filename[0] != "/":
  88. filename = os.path.dirname(os.path.abspath(__file__)) + "/" + filename
  89. with open(filename, encoding="utf-8") as f:
  90. results = json.loads(f.read())
  91. for benchmark in results["benchmarks"]:
  92. __results.append({
  93. "language": "cpp",
  94. "dataFilename": "",
  95. "behavior": "synthetic",
  96. "throughput": 10.0**9 / benchmark["cpu_time_ns"]
  97. })
  98. # Python results example:
  99. # [
  100. # [
  101. # {
  102. # "filename": string,
  103. # "benchmarks": {
  104. # behavior: results,
  105. # ...
  106. # },
  107. # },
  108. # ...
  109. # ], #pure-python
  110. # ...
  111. # ]
  112. def __parse_python_result(filename):
  113. if filename == "":
  114. return
  115. if filename[0] != '/':
  116. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  117. with open(filename, encoding="utf-8") as f:
  118. results_list = json.loads(f.read())
  119. for results in results_list:
  120. for result in results:
  121. _, avg_size = __get_data_size(result["filename"])
  122. for behavior in result["benchmarks"]:
  123. __results.append({
  124. "language": "python",
  125. "dataFilename": __extract_file_name(result["filename"]),
  126. "behavior": behavior,
  127. "throughput": result["benchmarks"][behavior]
  128. })
  129. # Java results example:
  130. # [
  131. # {
  132. # "id": string,
  133. # "instrumentSpec": {...},
  134. # "measurements": [
  135. # {
  136. # "weight": float,
  137. # "value": {
  138. # "magnitude": float,
  139. # "unit": string
  140. # },
  141. # ...
  142. # },
  143. # ...
  144. # ],
  145. # "run": {...},
  146. # "scenario": {
  147. # "benchmarkSpec": {
  148. # "methodName": string,
  149. # "parameters": {
  150. # defined parameters in the benchmark: parameters value
  151. # },
  152. # ...
  153. # },
  154. # ...
  155. # }
  156. #
  157. # },
  158. # ...
  159. # ]
  160. def __parse_java_result(filename):
  161. if filename == "":
  162. return
  163. if filename[0] != '/':
  164. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  165. with open(filename, encoding="utf-8") as f:
  166. results = json.loads(f.read())
  167. for result in results:
  168. total_weight = 0
  169. total_value = 0
  170. for measurement in result["measurements"]:
  171. total_weight += measurement["weight"]
  172. total_value += measurement["value"]["magnitude"]
  173. avg_time = total_value * 1.0 / total_weight
  174. total_size, _ = __get_data_size(
  175. result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
  176. __results.append({
  177. "language": "java",
  178. "throughput": total_size / avg_time * 1e9 / 2 ** 20,
  179. "behavior": result["scenario"]["benchmarkSpec"]["methodName"],
  180. "dataFilename": __extract_file_name(
  181. result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
  182. })
  183. # Go benchmark results:
  184. #
  185. # goos: linux
  186. # goarch: amd64
  187. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Unmarshal-12 3000 705784 ns/op
  188. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Marshal-12 2000 634648 ns/op
  189. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Size-12 5000 244174 ns/op
  190. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Clone-12 300 4120954 ns/op
  191. # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op
  192. # PASS
  193. # ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s
  194. def __parse_go_result(filename):
  195. if filename == "":
  196. return
  197. if filename[0] != '/':
  198. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  199. with open(filename, encoding="utf-8") as f:
  200. for line in f:
  201. result_list = re.split(r"[\ \t]+", line)
  202. if result_list[0][:9] != "Benchmark":
  203. continue
  204. first_slash_index = result_list[0].find('/')
  205. last_slash_index = result_list[0].rfind('/')
  206. full_filename = result_list[0][first_slash_index+1:last_slash_index]
  207. total_bytes, _ = __get_data_size(full_filename)
  208. behavior_with_suffix = result_list[0][last_slash_index+1:]
  209. last_dash = behavior_with_suffix.rfind("-")
  210. if last_dash == -1:
  211. behavior = behavior_with_suffix
  212. else:
  213. behavior = behavior_with_suffix[:last_dash]
  214. __results.append({
  215. "dataFilename": __extract_file_name(full_filename),
  216. "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20,
  217. "behavior": behavior,
  218. "language": "go"
  219. })
  220. # Self built json results example:
  221. #
  222. # [
  223. # {
  224. # "filename": string,
  225. # "benchmarks": {
  226. # behavior: results,
  227. # ...
  228. # },
  229. # },
  230. # ...
  231. # ]
  232. def __parse_custom_result(filename, language):
  233. if filename == "":
  234. return
  235. if filename[0] != '/':
  236. filename = os.path.dirname(os.path.abspath(__file__)) + '/' + filename
  237. with open(filename, encoding="utf-8") as f:
  238. results = json.loads(f.read())
  239. for result in results:
  240. _, avg_size = __get_data_size(result["filename"])
  241. for behavior in result["benchmarks"]:
  242. __results.append({
  243. "language": language,
  244. "dataFilename": __extract_file_name(result["filename"]),
  245. "behavior": behavior,
  246. "throughput": result["benchmarks"][behavior]
  247. })
  248. def __parse_js_result(filename, language):
  249. return __parse_custom_result(filename, language)
  250. def __parse_php_result(filename, language):
  251. return __parse_custom_result(filename, language)
  252. def get_result_from_file(cpp_file="",
  253. java_file="",
  254. python_file="",
  255. go_file="",
  256. synthetic_file="",
  257. node_file="",
  258. php_c_file="",
  259. php_file=""):
  260. results = {}
  261. if cpp_file != "":
  262. __parse_cpp_result(cpp_file)
  263. if java_file != "":
  264. __parse_java_result(java_file)
  265. if python_file != "":
  266. __parse_python_result(python_file)
  267. if go_file != "":
  268. __parse_go_result(go_file)
  269. if synthetic_file != "":
  270. __parse_synthetic_result(synthetic_file)
  271. if node_file != "":
  272. __parse_js_result(node_file, "node")
  273. if php_file != "":
  274. __parse_php_result(php_file, "php")
  275. if php_c_file != "":
  276. __parse_php_result(php_c_file, "php")
  277. return __results
  278. if __name__ == "__main__":
  279. parser = argparse.ArgumentParser()
  280. parser.add_argument(
  281. "-cpp",
  282. "--cpp_input_file",
  283. help="The CPP benchmark result file's name",
  284. default="")
  285. parser.add_argument(
  286. "-java",
  287. "--java_input_file",
  288. help="The Java benchmark result file's name",
  289. default="")
  290. parser.add_argument(
  291. "-python",
  292. "--python_input_file",
  293. help="The Python benchmark result file's name",
  294. default="")
  295. parser.add_argument(
  296. "-go",
  297. "--go_input_file",
  298. help="The golang benchmark result file's name",
  299. default="")
  300. parser.add_argument(
  301. "-node",
  302. "--node_input_file",
  303. help="The node.js benchmark result file's name",
  304. default="")
  305. parser.add_argument(
  306. "-php",
  307. "--php_input_file",
  308. help="The pure php benchmark result file's name",
  309. default="")
  310. parser.add_argument(
  311. "-php_c",
  312. "--php_c_input_file",
  313. help="The php with c ext benchmark result file's name",
  314. default="")
  315. args = parser.parse_args()
  316. results = get_result_from_file(
  317. cpp_file=args.cpp_input_file,
  318. java_file=args.java_input_file,
  319. python_file=args.python_input_file,
  320. go_file=args.go_input_file,
  321. node_file=args.node_input_file,
  322. php_file=args.php_input_file,
  323. php_c_file=args.php_c_input_file,
  324. )
  325. print(json.dumps(results, indent=2))