gen_stats_data.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. #!/usr/bin/env python3
  2. # Copyright 2017 gRPC authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. from __future__ import print_function
  16. import collections
  17. import ctypes
  18. import json
  19. import math
  20. import sys
  21. import yaml
  22. with open('src/core/lib/debug/stats_data.yaml') as f:
  23. attrs = yaml.load(f.read())
  24. REQUIRED_FIELDS = ['name', 'doc']
  25. def make_type(name, fields):
  26. return (collections.namedtuple(
  27. name, ' '.join(list(set(REQUIRED_FIELDS + fields)))), [])
  28. def c_str(s, encoding='ascii'):
  29. if isinstance(s, str):
  30. s = s.encode(encoding)
  31. result = ''
  32. for c in s:
  33. c = chr(c) if isinstance(c, int) else c
  34. if not (32 <= ord(c) < 127) or c in ('\\', '"'):
  35. result += '\\%03o' % ord(c)
  36. else:
  37. result += c
  38. return '"' + result + '"'
  39. types = (
  40. make_type('Counter', []),
  41. make_type('Histogram', ['max', 'buckets']),
  42. )
  43. inst_map = dict((t[0].__name__, t[1]) for t in types)
  44. stats = []
  45. for attr in attrs:
  46. found = False
  47. for t, lst in types:
  48. t_name = t.__name__.lower()
  49. if t_name in attr:
  50. name = attr[t_name]
  51. del attr[t_name]
  52. lst.append(t(name=name, **attr))
  53. found = True
  54. break
  55. assert found, "Bad decl: %s" % attr
  56. def dbl2u64(d):
  57. return ctypes.c_ulonglong.from_buffer(ctypes.c_double(d)).value
  58. def shift_works_until(mapped_bounds, shift_bits):
  59. for i, ab in enumerate(zip(mapped_bounds, mapped_bounds[1:])):
  60. a, b = ab
  61. if (a >> shift_bits) == (b >> shift_bits):
  62. return i
  63. return len(mapped_bounds)
  64. def find_ideal_shift(mapped_bounds, max_size):
  65. best = None
  66. for shift_bits in reversed(list(range(0, 64))):
  67. n = shift_works_until(mapped_bounds, shift_bits)
  68. if n == 0:
  69. continue
  70. table_size = mapped_bounds[n - 1] >> shift_bits
  71. if table_size > max_size:
  72. continue
  73. if table_size > 65535:
  74. continue
  75. if best is None:
  76. best = (shift_bits, n, table_size)
  77. elif best[1] < n:
  78. best = (shift_bits, n, table_size)
  79. print(best)
  80. return best
  81. def gen_map_table(mapped_bounds, shift_data):
  82. tbl = []
  83. cur = 0
  84. print(mapped_bounds)
  85. mapped_bounds = [x >> shift_data[0] for x in mapped_bounds]
  86. print(mapped_bounds)
  87. for i in range(0, mapped_bounds[shift_data[1] - 1]):
  88. while i > mapped_bounds[cur]:
  89. cur += 1
  90. tbl.append(cur)
  91. return tbl
  92. static_tables = []
  93. def decl_static_table(values, type):
  94. global static_tables
  95. v = (type, values)
  96. for i, vp in enumerate(static_tables):
  97. if v == vp:
  98. return i
  99. print("ADD TABLE: %s %r" % (type, values))
  100. r = len(static_tables)
  101. static_tables.append(v)
  102. return r
  103. def type_for_uint_table(table):
  104. mv = max(table)
  105. if mv < 2**8:
  106. return 'uint8_t'
  107. elif mv < 2**16:
  108. return 'uint16_t'
  109. elif mv < 2**32:
  110. return 'uint32_t'
  111. else:
  112. return 'uint64_t'
  113. def gen_bucket_code(histogram):
  114. bounds = [0, 1]
  115. done_trivial = False
  116. done_unmapped = False
  117. first_nontrivial = None
  118. first_unmapped = None
  119. while len(bounds) < histogram.buckets + 1:
  120. if len(bounds) == histogram.buckets:
  121. nextb = int(histogram.max)
  122. else:
  123. mul = math.pow(
  124. float(histogram.max) / bounds[-1],
  125. 1.0 / (histogram.buckets + 1 - len(bounds)))
  126. nextb = int(math.ceil(bounds[-1] * mul))
  127. if nextb <= bounds[-1] + 1:
  128. nextb = bounds[-1] + 1
  129. elif not done_trivial:
  130. done_trivial = True
  131. first_nontrivial = len(bounds)
  132. bounds.append(nextb)
  133. bounds_idx = decl_static_table(bounds, 'int')
  134. if done_trivial:
  135. first_nontrivial_code = dbl2u64(first_nontrivial)
  136. code_bounds = [dbl2u64(x) - first_nontrivial_code for x in bounds]
  137. shift_data = find_ideal_shift(code_bounds[first_nontrivial:],
  138. 256 * histogram.buckets)
  139. #print first_nontrivial, shift_data, bounds
  140. #if shift_data is not None: print [hex(x >> shift_data[0]) for x in code_bounds[first_nontrivial:]]
  141. code = 'value = grpc_core::Clamp(value, 0, %d);\n' % histogram.max
  142. map_table = gen_map_table(code_bounds[first_nontrivial:], shift_data)
  143. if first_nontrivial is None:
  144. code += ('GRPC_STATS_INC_HISTOGRAM(GRPC_STATS_HISTOGRAM_%s, value);\n' %
  145. histogram.name.upper())
  146. else:
  147. code += 'if (value < %d) {\n' % first_nontrivial
  148. code += ('GRPC_STATS_INC_HISTOGRAM(GRPC_STATS_HISTOGRAM_%s, value);\n' %
  149. histogram.name.upper())
  150. code += 'return;\n'
  151. code += '}'
  152. first_nontrivial_code = dbl2u64(first_nontrivial)
  153. if shift_data is not None:
  154. map_table_idx = decl_static_table(map_table,
  155. type_for_uint_table(map_table))
  156. code += 'union { double dbl; uint64_t uint; } _val, _bkt;\n'
  157. code += '_val.dbl = value;\n'
  158. code += 'if (_val.uint < %dull) {\n' % (
  159. (map_table[-1] << shift_data[0]) + first_nontrivial_code)
  160. code += 'int bucket = '
  161. code += 'grpc_stats_table_%d[((_val.uint - %dull) >> %d)] + %d;\n' % (
  162. map_table_idx, first_nontrivial_code, shift_data[0],
  163. first_nontrivial)
  164. code += '_bkt.dbl = grpc_stats_table_%d[bucket];\n' % bounds_idx
  165. code += 'bucket -= (_val.uint < _bkt.uint);\n'
  166. code += 'GRPC_STATS_INC_HISTOGRAM(GRPC_STATS_HISTOGRAM_%s, bucket);\n' % histogram.name.upper(
  167. )
  168. code += 'return;\n'
  169. code += '}\n'
  170. code += 'GRPC_STATS_INC_HISTOGRAM(GRPC_STATS_HISTOGRAM_%s, ' % histogram.name.upper(
  171. )
  172. code += 'grpc_stats_histo_find_bucket_slow(value, grpc_stats_table_%d, %d));\n' % (
  173. bounds_idx, histogram.buckets)
  174. return (code, bounds_idx)
  175. # utility: print a big comment block into a set of files
  176. def put_banner(files, banner):
  177. for f in files:
  178. print('/*', file=f)
  179. for line in banner:
  180. print(' * %s' % line, file=f)
  181. print(' */', file=f)
  182. print(file=f)
  183. with open('src/core/lib/debug/stats_data.h', 'w') as H:
  184. # copy-paste copyright notice from this file
  185. with open(sys.argv[0]) as my_source:
  186. copyright = []
  187. for line in my_source:
  188. if line[0] != '#':
  189. break
  190. for line in my_source:
  191. if line[0] == '#':
  192. copyright.append(line)
  193. break
  194. for line in my_source:
  195. if line[0] != '#':
  196. break
  197. copyright.append(line)
  198. put_banner([H], [line[2:].rstrip() for line in copyright])
  199. put_banner(
  200. [H],
  201. ["Automatically generated by tools/codegen/core/gen_stats_data.py"])
  202. print("#ifndef GRPC_CORE_LIB_DEBUG_STATS_DATA_H", file=H)
  203. print("#define GRPC_CORE_LIB_DEBUG_STATS_DATA_H", file=H)
  204. print(file=H)
  205. print("#include <grpc/support/port_platform.h>", file=H)
  206. print(file=H)
  207. print("#include <inttypes.h>", file=H)
  208. print("#include \"src/core/lib/iomgr/exec_ctx.h\"", file=H)
  209. print(file=H)
  210. for typename, instances in sorted(inst_map.items()):
  211. print("typedef enum {", file=H)
  212. for inst in instances:
  213. print(" GRPC_STATS_%s_%s," % (typename.upper(), inst.name.upper()),
  214. file=H)
  215. print(" GRPC_STATS_%s_COUNT" % (typename.upper()), file=H)
  216. print("} grpc_stats_%ss;" % (typename.lower()), file=H)
  217. print("extern const char *grpc_stats_%s_name[GRPC_STATS_%s_COUNT];" %
  218. (typename.lower(), typename.upper()),
  219. file=H)
  220. print("extern const char *grpc_stats_%s_doc[GRPC_STATS_%s_COUNT];" %
  221. (typename.lower(), typename.upper()),
  222. file=H)
  223. histo_start = []
  224. histo_buckets = []
  225. histo_bucket_boundaries = []
  226. print("typedef enum {", file=H)
  227. first_slot = 0
  228. for histogram in inst_map['Histogram']:
  229. histo_start.append(first_slot)
  230. histo_buckets.append(histogram.buckets)
  231. print(" GRPC_STATS_HISTOGRAM_%s_FIRST_SLOT = %d," %
  232. (histogram.name.upper(), first_slot),
  233. file=H)
  234. print(" GRPC_STATS_HISTOGRAM_%s_BUCKETS = %d," %
  235. (histogram.name.upper(), histogram.buckets),
  236. file=H)
  237. first_slot += histogram.buckets
  238. print(" GRPC_STATS_HISTOGRAM_BUCKETS = %d" % first_slot, file=H)
  239. print("} grpc_stats_histogram_constants;", file=H)
  240. print("#if defined(GRPC_COLLECT_STATS) || !defined(NDEBUG)", file=H)
  241. for ctr in inst_map['Counter']:
  242. print(("#define GRPC_STATS_INC_%s() " +
  243. "GRPC_STATS_INC_COUNTER(GRPC_STATS_COUNTER_%s)") %
  244. (ctr.name.upper(), ctr.name.upper()),
  245. file=H)
  246. for histogram in inst_map['Histogram']:
  247. print(
  248. "#define GRPC_STATS_INC_%s(value) grpc_stats_inc_%s( (int)(value))"
  249. % (histogram.name.upper(), histogram.name.lower()),
  250. file=H)
  251. print("void grpc_stats_inc_%s(int x);" % histogram.name.lower(), file=H)
  252. print("#else", file=H)
  253. for ctr in inst_map['Counter']:
  254. print(("#define GRPC_STATS_INC_%s() ") % (ctr.name.upper()), file=H)
  255. for histogram in inst_map['Histogram']:
  256. print("#define GRPC_STATS_INC_%s(value)" % (histogram.name.upper()),
  257. file=H)
  258. print("#endif /* defined(GRPC_COLLECT_STATS) || !defined(NDEBUG) */",
  259. file=H)
  260. for i, tbl in enumerate(static_tables):
  261. print("extern const %s grpc_stats_table_%d[%d];" %
  262. (tbl[0], i, len(tbl[1])),
  263. file=H)
  264. print("extern const int grpc_stats_histo_buckets[%d];" %
  265. len(inst_map['Histogram']),
  266. file=H)
  267. print("extern const int grpc_stats_histo_start[%d];" %
  268. len(inst_map['Histogram']),
  269. file=H)
  270. print("extern const int *const grpc_stats_histo_bucket_boundaries[%d];" %
  271. len(inst_map['Histogram']),
  272. file=H)
  273. print("extern void (*const grpc_stats_inc_histogram[%d])(int x);" %
  274. len(inst_map['Histogram']),
  275. file=H)
  276. print(file=H)
  277. print("#endif /* GRPC_CORE_LIB_DEBUG_STATS_DATA_H */", file=H)
  278. with open('src/core/lib/debug/stats_data.cc', 'w') as C:
  279. # copy-paste copyright notice from this file
  280. with open(sys.argv[0]) as my_source:
  281. copyright = []
  282. for line in my_source:
  283. if line[0] != '#':
  284. break
  285. for line in my_source:
  286. if line[0] == '#':
  287. copyright.append(line)
  288. break
  289. for line in my_source:
  290. if line[0] != '#':
  291. break
  292. copyright.append(line)
  293. put_banner([C], [line[2:].rstrip() for line in copyright])
  294. put_banner(
  295. [C],
  296. ["Automatically generated by tools/codegen/core/gen_stats_data.py"])
  297. print("#include <grpc/support/port_platform.h>", file=C)
  298. print(file=C)
  299. print("#include \"src/core/lib/debug/stats.h\"", file=C)
  300. print("#include \"src/core/lib/debug/stats_data.h\"", file=C)
  301. print("#include \"src/core/lib/gpr/useful.h\"", file=C)
  302. print("#include \"src/core/lib/iomgr/exec_ctx.h\"", file=C)
  303. print(file=C)
  304. histo_code = []
  305. for histogram in inst_map['Histogram']:
  306. code, bounds_idx = gen_bucket_code(histogram)
  307. histo_bucket_boundaries.append(bounds_idx)
  308. histo_code.append(code)
  309. for typename, instances in sorted(inst_map.items()):
  310. print("const char *grpc_stats_%s_name[GRPC_STATS_%s_COUNT] = {" %
  311. (typename.lower(), typename.upper()),
  312. file=C)
  313. for inst in instances:
  314. print(" %s," % c_str(inst.name), file=C)
  315. print("};", file=C)
  316. print("const char *grpc_stats_%s_doc[GRPC_STATS_%s_COUNT] = {" %
  317. (typename.lower(), typename.upper()),
  318. file=C)
  319. for inst in instances:
  320. print(" %s," % c_str(inst.doc), file=C)
  321. print("};", file=C)
  322. for i, tbl in enumerate(static_tables):
  323. print("const %s grpc_stats_table_%d[%d] = {%s};" %
  324. (tbl[0], i, len(tbl[1]), ','.join('%s' % x for x in tbl[1])),
  325. file=C)
  326. for histogram, code in zip(inst_map['Histogram'], histo_code):
  327. print(("void grpc_stats_inc_%s(int value) {%s}") %
  328. (histogram.name.lower(), code),
  329. file=C)
  330. print(
  331. "const int grpc_stats_histo_buckets[%d] = {%s};" %
  332. (len(inst_map['Histogram']), ','.join('%s' % x for x in histo_buckets)),
  333. file=C)
  334. print("const int grpc_stats_histo_start[%d] = {%s};" %
  335. (len(inst_map['Histogram']), ','.join('%s' % x for x in histo_start)),
  336. file=C)
  337. print("const int *const grpc_stats_histo_bucket_boundaries[%d] = {%s};" %
  338. (len(inst_map['Histogram']), ','.join(
  339. 'grpc_stats_table_%d' % x for x in histo_bucket_boundaries)),
  340. file=C)
  341. print("void (*const grpc_stats_inc_histogram[%d])(int x) = {%s};" %
  342. (len(inst_map['Histogram']), ','.join(
  343. 'grpc_stats_inc_%s' % histogram.name.lower()
  344. for histogram in inst_map['Histogram'])),
  345. file=C)
  346. # patch qps_test bigquery schema
  347. RECORD_EXPLICIT_PERCENTILES = [50, 95, 99]
  348. with open('tools/run_tests/performance/scenario_result_schema.json', 'r') as f:
  349. qps_schema = json.loads(f.read())
  350. def FindNamed(js, name):
  351. for el in js:
  352. if el['name'] == name:
  353. return el
  354. def RemoveCoreFields(js):
  355. new_fields = []
  356. for field in js['fields']:
  357. if not field['name'].startswith('core_'):
  358. new_fields.append(field)
  359. js['fields'] = new_fields
  360. RemoveCoreFields(FindNamed(qps_schema, 'clientStats'))
  361. RemoveCoreFields(FindNamed(qps_schema, 'serverStats'))
  362. def AddCoreFields(js):
  363. for counter in inst_map['Counter']:
  364. js['fields'].append({
  365. 'name': 'core_%s' % counter.name,
  366. 'type': 'INTEGER',
  367. 'mode': 'NULLABLE'
  368. })
  369. for histogram in inst_map['Histogram']:
  370. js['fields'].append({
  371. 'name': 'core_%s' % histogram.name,
  372. 'type': 'STRING',
  373. 'mode': 'NULLABLE'
  374. })
  375. js['fields'].append({
  376. 'name': 'core_%s_bkts' % histogram.name,
  377. 'type': 'STRING',
  378. 'mode': 'NULLABLE'
  379. })
  380. for pctl in RECORD_EXPLICIT_PERCENTILES:
  381. js['fields'].append({
  382. 'name': 'core_%s_%dp' % (histogram.name, pctl),
  383. 'type': 'FLOAT',
  384. 'mode': 'NULLABLE'
  385. })
  386. AddCoreFields(FindNamed(qps_schema, 'clientStats'))
  387. AddCoreFields(FindNamed(qps_schema, 'serverStats'))
  388. with open('tools/run_tests/performance/scenario_result_schema.json', 'w') as f:
  389. f.write(json.dumps(qps_schema, indent=2, sort_keys=True))
  390. # and generate a helper script to massage scenario results into the format we'd
  391. # like to query
  392. with open('tools/run_tests/performance/massage_qps_stats.py', 'w') as P:
  393. with open(sys.argv[0]) as my_source:
  394. for line in my_source:
  395. if line[0] != '#':
  396. break
  397. for line in my_source:
  398. if line[0] == '#':
  399. print(line.rstrip(), file=P)
  400. break
  401. for line in my_source:
  402. if line[0] != '#':
  403. break
  404. print(line.rstrip(), file=P)
  405. print(file=P)
  406. print('# Autogenerated by tools/codegen/core/gen_stats_data.py', file=P)
  407. print(file=P)
  408. print('import massage_qps_stats_helpers', file=P)
  409. print('def massage_qps_stats(scenario_result):', file=P)
  410. print(
  411. ' for stats in scenario_result["serverStats"] + scenario_result["clientStats"]:',
  412. file=P)
  413. print(' if "coreStats" in stats:', file=P)
  414. print(
  415. ' # Get rid of the "coreStats" element and replace it by statistics',
  416. file=P)
  417. print(' # that correspond to columns in the bigquery schema.', file=P)
  418. print(' core_stats = stats["coreStats"]', file=P)
  419. print(' del stats["coreStats"]', file=P)
  420. for counter in inst_map['Counter']:
  421. print(
  422. ' stats["core_%s"] = massage_qps_stats_helpers.counter(core_stats, "%s")'
  423. % (counter.name, counter.name),
  424. file=P)
  425. for i, histogram in enumerate(inst_map['Histogram']):
  426. print(
  427. ' h = massage_qps_stats_helpers.histogram(core_stats, "%s")' %
  428. histogram.name,
  429. file=P)
  430. print(
  431. ' stats["core_%s"] = ",".join("%%f" %% x for x in h.buckets)' %
  432. histogram.name,
  433. file=P)
  434. print(
  435. ' stats["core_%s_bkts"] = ",".join("%%f" %% x for x in h.boundaries)'
  436. % histogram.name,
  437. file=P)
  438. for pctl in RECORD_EXPLICIT_PERCENTILES:
  439. print(
  440. ' stats["core_%s_%dp"] = massage_qps_stats_helpers.percentile(h.buckets, %d, h.boundaries)'
  441. % (histogram.name, pctl, pctl),
  442. file=P)
  443. with open('src/core/lib/debug/stats_data_bq_schema.sql', 'w') as S:
  444. columns = []
  445. for counter in inst_map['Counter']:
  446. columns.append(('%s_per_iteration' % counter.name, 'FLOAT'))
  447. print(',\n'.join('%s:%s' % x for x in columns), file=S)