report.py 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054
  1. """report.py - Utilities for reporting statistics about benchmark results
  2. """
  3. import unittest
  4. import os
  5. import re
  6. import copy
  7. import random
  8. from scipy.stats import mannwhitneyu
  9. class BenchmarkColor(object):
  10. def __init__(self, name, code):
  11. self.name = name
  12. self.code = code
  13. def __repr__(self):
  14. return '%s%r' % (self.__class__.__name__,
  15. (self.name, self.code))
  16. def __format__(self, format):
  17. return self.code
  18. # Benchmark Colors Enumeration
  19. BC_NONE = BenchmarkColor('NONE', '')
  20. BC_MAGENTA = BenchmarkColor('MAGENTA', '\033[95m')
  21. BC_CYAN = BenchmarkColor('CYAN', '\033[96m')
  22. BC_OKBLUE = BenchmarkColor('OKBLUE', '\033[94m')
  23. BC_OKGREEN = BenchmarkColor('OKGREEN', '\033[32m')
  24. BC_HEADER = BenchmarkColor('HEADER', '\033[92m')
  25. BC_WARNING = BenchmarkColor('WARNING', '\033[93m')
  26. BC_WHITE = BenchmarkColor('WHITE', '\033[97m')
  27. BC_FAIL = BenchmarkColor('FAIL', '\033[91m')
  28. BC_ENDC = BenchmarkColor('ENDC', '\033[0m')
  29. BC_BOLD = BenchmarkColor('BOLD', '\033[1m')
  30. BC_UNDERLINE = BenchmarkColor('UNDERLINE', '\033[4m')
  31. UTEST_MIN_REPETITIONS = 2
  32. UTEST_OPTIMAL_REPETITIONS = 9 # Lowest reasonable number, More is better.
  33. UTEST_COL_NAME = "_pvalue"
  34. def color_format(use_color, fmt_str, *args, **kwargs):
  35. """
  36. Return the result of 'fmt_str.format(*args, **kwargs)' after transforming
  37. 'args' and 'kwargs' according to the value of 'use_color'. If 'use_color'
  38. is False then all color codes in 'args' and 'kwargs' are replaced with
  39. the empty string.
  40. """
  41. assert use_color is True or use_color is False
  42. if not use_color:
  43. args = [arg if not isinstance(arg, BenchmarkColor) else BC_NONE
  44. for arg in args]
  45. kwargs = {key: arg if not isinstance(arg, BenchmarkColor) else BC_NONE
  46. for key, arg in kwargs.items()}
  47. return fmt_str.format(*args, **kwargs)
  48. def find_longest_name(benchmark_list):
  49. """
  50. Return the length of the longest benchmark name in a given list of
  51. benchmark JSON objects
  52. """
  53. longest_name = 1
  54. for bc in benchmark_list:
  55. if len(bc['name']) > longest_name:
  56. longest_name = len(bc['name'])
  57. return longest_name
  58. def calculate_change(old_val, new_val):
  59. """
  60. Return a float representing the decimal change between old_val and new_val.
  61. """
  62. if old_val == 0 and new_val == 0:
  63. return 0.0
  64. if old_val == 0:
  65. return float(new_val - old_val) / (float(old_val + new_val) / 2)
  66. return float(new_val - old_val) / abs(old_val)
  67. def filter_benchmark(json_orig, family, replacement=""):
  68. """
  69. Apply a filter to the json, and only leave the 'family' of benchmarks.
  70. """
  71. regex = re.compile(family)
  72. filtered = {}
  73. filtered['benchmarks'] = []
  74. for be in json_orig['benchmarks']:
  75. if not regex.search(be['name']):
  76. continue
  77. filteredbench = copy.deepcopy(be) # Do NOT modify the old name!
  78. filteredbench['name'] = regex.sub(replacement, filteredbench['name'])
  79. filtered['benchmarks'].append(filteredbench)
  80. return filtered
  81. def get_unique_benchmark_names(json):
  82. """
  83. While *keeping* the order, give all the unique 'names' used for benchmarks.
  84. """
  85. seen = set()
  86. uniqued = [x['name'] for x in json['benchmarks']
  87. if x['name'] not in seen and
  88. (seen.add(x['name']) or True)]
  89. return uniqued
  90. def intersect(list1, list2):
  91. """
  92. Given two lists, get a new list consisting of the elements only contained
  93. in *both of the input lists*, while preserving the ordering.
  94. """
  95. return [x for x in list1 if x in list2]
  96. def is_potentially_comparable_benchmark(x):
  97. return ('time_unit' in x and 'real_time' in x and 'cpu_time' in x)
  98. def partition_benchmarks(json1, json2):
  99. """
  100. While preserving the ordering, find benchmarks with the same names in
  101. both of the inputs, and group them.
  102. (i.e. partition/filter into groups with common name)
  103. """
  104. json1_unique_names = get_unique_benchmark_names(json1)
  105. json2_unique_names = get_unique_benchmark_names(json2)
  106. names = intersect(json1_unique_names, json2_unique_names)
  107. partitions = []
  108. for name in names:
  109. time_unit = None
  110. # Pick the time unit from the first entry of the lhs benchmark.
  111. # We should be careful not to crash with unexpected input.
  112. for x in json1['benchmarks']:
  113. if (x['name'] == name and is_potentially_comparable_benchmark(x)):
  114. time_unit = x['time_unit']
  115. break
  116. if time_unit is None:
  117. continue
  118. # Filter by name and time unit.
  119. # All the repetitions are assumed to be comparable.
  120. lhs = [x for x in json1['benchmarks'] if x['name'] == name and
  121. x['time_unit'] == time_unit]
  122. rhs = [x for x in json2['benchmarks'] if x['name'] == name and
  123. x['time_unit'] == time_unit]
  124. partitions.append([lhs, rhs])
  125. return partitions
  126. def extract_field(partition, field_name):
  127. # The count of elements may be different. We want *all* of them.
  128. lhs = [x[field_name] for x in partition[0]]
  129. rhs = [x[field_name] for x in partition[1]]
  130. return [lhs, rhs]
  131. def calc_utest(timings_cpu, timings_time):
  132. min_rep_cnt = min(len(timings_time[0]),
  133. len(timings_time[1]),
  134. len(timings_cpu[0]),
  135. len(timings_cpu[1]))
  136. # Does *everything* has at least UTEST_MIN_REPETITIONS repetitions?
  137. if min_rep_cnt < UTEST_MIN_REPETITIONS:
  138. return False, None, None
  139. time_pvalue = mannwhitneyu(
  140. timings_time[0], timings_time[1], alternative='two-sided').pvalue
  141. cpu_pvalue = mannwhitneyu(
  142. timings_cpu[0], timings_cpu[1], alternative='two-sided').pvalue
  143. return (min_rep_cnt >= UTEST_OPTIMAL_REPETITIONS), cpu_pvalue, time_pvalue
  144. def print_utest(bc_name, utest, utest_alpha, first_col_width, use_color=True):
  145. def get_utest_color(pval):
  146. return BC_FAIL if pval >= utest_alpha else BC_OKGREEN
  147. # Check if we failed miserably with minimum required repetitions for utest
  148. if not utest['have_optimal_repetitions'] and utest['cpu_pvalue'] is None and utest['time_pvalue'] is None:
  149. return []
  150. dsc = "U Test, Repetitions: {} vs {}".format(
  151. utest['nr_of_repetitions'], utest['nr_of_repetitions_other'])
  152. dsc_color = BC_OKGREEN
  153. # We still got some results to show but issue a warning about it.
  154. if not utest['have_optimal_repetitions']:
  155. dsc_color = BC_WARNING
  156. dsc += ". WARNING: Results unreliable! {}+ repetitions recommended.".format(
  157. UTEST_OPTIMAL_REPETITIONS)
  158. special_str = "{}{:<{}s}{endc}{}{:16.4f}{endc}{}{:16.4f}{endc}{} {}"
  159. return [color_format(use_color,
  160. special_str,
  161. BC_HEADER,
  162. "{}{}".format(bc_name, UTEST_COL_NAME),
  163. first_col_width,
  164. get_utest_color(
  165. utest['time_pvalue']), utest['time_pvalue'],
  166. get_utest_color(
  167. utest['cpu_pvalue']), utest['cpu_pvalue'],
  168. dsc_color, dsc,
  169. endc=BC_ENDC)]
  170. def get_difference_report(
  171. json1,
  172. json2,
  173. utest=False):
  174. """
  175. Calculate and report the difference between each test of two benchmarks
  176. runs specified as 'json1' and 'json2'. Output is another json containing
  177. relevant details for each test run.
  178. """
  179. assert utest is True or utest is False
  180. diff_report = []
  181. partitions = partition_benchmarks(json1, json2)
  182. for partition in partitions:
  183. benchmark_name = partition[0][0]['name']
  184. time_unit = partition[0][0]['time_unit']
  185. measurements = []
  186. utest_results = {}
  187. # Careful, we may have different repetition count.
  188. for i in range(min(len(partition[0]), len(partition[1]))):
  189. bn = partition[0][i]
  190. other_bench = partition[1][i]
  191. measurements.append({
  192. 'real_time': bn['real_time'],
  193. 'cpu_time': bn['cpu_time'],
  194. 'real_time_other': other_bench['real_time'],
  195. 'cpu_time_other': other_bench['cpu_time'],
  196. 'time': calculate_change(bn['real_time'], other_bench['real_time']),
  197. 'cpu': calculate_change(bn['cpu_time'], other_bench['cpu_time'])
  198. })
  199. # After processing the whole partition, if requested, do the U test.
  200. if utest:
  201. timings_cpu = extract_field(partition, 'cpu_time')
  202. timings_time = extract_field(partition, 'real_time')
  203. have_optimal_repetitions, cpu_pvalue, time_pvalue = calc_utest(timings_cpu, timings_time)
  204. if cpu_pvalue and time_pvalue:
  205. utest_results = {
  206. 'have_optimal_repetitions': have_optimal_repetitions,
  207. 'cpu_pvalue': cpu_pvalue,
  208. 'time_pvalue': time_pvalue,
  209. 'nr_of_repetitions': len(timings_cpu[0]),
  210. 'nr_of_repetitions_other': len(timings_cpu[1])
  211. }
  212. # Store only if we had any measurements for given benchmark.
  213. # E.g. partition_benchmarks will filter out the benchmarks having
  214. # time units which are not compatible with other time units in the
  215. # benchmark suite.
  216. if measurements:
  217. run_type = partition[0][0]['run_type'] if 'run_type' in partition[0][0] else ''
  218. aggregate_name = partition[0][0]['aggregate_name'] if run_type == 'aggregate' and 'aggregate_name' in partition[0][0] else ''
  219. diff_report.append({
  220. 'name': benchmark_name,
  221. 'measurements': measurements,
  222. 'time_unit': time_unit,
  223. 'run_type': run_type,
  224. 'aggregate_name': aggregate_name,
  225. 'utest': utest_results
  226. })
  227. return diff_report
  228. def print_difference_report(
  229. json_diff_report,
  230. include_aggregates_only=False,
  231. utest=False,
  232. utest_alpha=0.05,
  233. use_color=True):
  234. """
  235. Calculate and report the difference between each test of two benchmarks
  236. runs specified as 'json1' and 'json2'.
  237. """
  238. assert utest is True or utest is False
  239. def get_color(res):
  240. if res > 0.05:
  241. return BC_FAIL
  242. elif res > -0.07:
  243. return BC_WHITE
  244. else:
  245. return BC_CYAN
  246. first_col_width = find_longest_name(json_diff_report)
  247. first_col_width = max(
  248. first_col_width,
  249. len('Benchmark'))
  250. first_col_width += len(UTEST_COL_NAME)
  251. first_line = "{:<{}s}Time CPU Time Old Time New CPU Old CPU New".format(
  252. 'Benchmark', 12 + first_col_width)
  253. output_strs = [first_line, '-' * len(first_line)]
  254. fmt_str = "{}{:<{}s}{endc}{}{:+16.4f}{endc}{}{:+16.4f}{endc}{:14.0f}{:14.0f}{endc}{:14.0f}{:14.0f}"
  255. for benchmark in json_diff_report:
  256. # *If* we were asked to only include aggregates,
  257. # and if it is non-aggregate, then don't print it.
  258. if not include_aggregates_only or not 'run_type' in benchmark or benchmark['run_type'] == 'aggregate':
  259. for measurement in benchmark['measurements']:
  260. output_strs += [color_format(use_color,
  261. fmt_str,
  262. BC_HEADER,
  263. benchmark['name'],
  264. first_col_width,
  265. get_color(measurement['time']),
  266. measurement['time'],
  267. get_color(measurement['cpu']),
  268. measurement['cpu'],
  269. measurement['real_time'],
  270. measurement['real_time_other'],
  271. measurement['cpu_time'],
  272. measurement['cpu_time_other'],
  273. endc=BC_ENDC)]
  274. # After processing the measurements, if requested and
  275. # if applicable (e.g. u-test exists for given benchmark),
  276. # print the U test.
  277. if utest and benchmark['utest']:
  278. output_strs += print_utest(benchmark['name'],
  279. benchmark['utest'],
  280. utest_alpha=utest_alpha,
  281. first_col_width=first_col_width,
  282. use_color=use_color)
  283. return output_strs
  284. ###############################################################################
  285. # Unit tests
  286. class TestGetUniqueBenchmarkNames(unittest.TestCase):
  287. def load_results(self):
  288. import json
  289. testInputs = os.path.join(
  290. os.path.dirname(
  291. os.path.realpath(__file__)),
  292. 'Inputs')
  293. testOutput = os.path.join(testInputs, 'test3_run0.json')
  294. with open(testOutput, 'r') as f:
  295. json = json.load(f)
  296. return json
  297. def test_basic(self):
  298. expect_lines = [
  299. 'BM_One',
  300. 'BM_Two',
  301. 'short', # These two are not sorted
  302. 'medium', # These two are not sorted
  303. ]
  304. json = self.load_results()
  305. output_lines = get_unique_benchmark_names(json)
  306. print("\n")
  307. print("\n".join(output_lines))
  308. self.assertEqual(len(output_lines), len(expect_lines))
  309. for i in range(0, len(output_lines)):
  310. self.assertEqual(expect_lines[i], output_lines[i])
  311. class TestReportDifference(unittest.TestCase):
  312. @classmethod
  313. def setUpClass(cls):
  314. def load_results():
  315. import json
  316. testInputs = os.path.join(
  317. os.path.dirname(
  318. os.path.realpath(__file__)),
  319. 'Inputs')
  320. testOutput1 = os.path.join(testInputs, 'test1_run1.json')
  321. testOutput2 = os.path.join(testInputs, 'test1_run2.json')
  322. with open(testOutput1, 'r') as f:
  323. json1 = json.load(f)
  324. with open(testOutput2, 'r') as f:
  325. json2 = json.load(f)
  326. return json1, json2
  327. json1, json2 = load_results()
  328. cls.json_diff_report = get_difference_report(json1, json2)
  329. def test_json_diff_report_pretty_printing(self):
  330. expect_lines = [
  331. ['BM_SameTimes', '+0.0000', '+0.0000', '10', '10', '10', '10'],
  332. ['BM_2xFaster', '-0.5000', '-0.5000', '50', '25', '50', '25'],
  333. ['BM_2xSlower', '+1.0000', '+1.0000', '50', '100', '50', '100'],
  334. ['BM_1PercentFaster', '-0.0100', '-0.0100', '100', '99', '100', '99'],
  335. ['BM_1PercentSlower', '+0.0100', '+0.0100', '100', '101', '100', '101'],
  336. ['BM_10PercentFaster', '-0.1000', '-0.1000', '100', '90', '100', '90'],
  337. ['BM_10PercentSlower', '+0.1000', '+0.1000', '100', '110', '100', '110'],
  338. ['BM_100xSlower', '+99.0000', '+99.0000',
  339. '100', '10000', '100', '10000'],
  340. ['BM_100xFaster', '-0.9900', '-0.9900',
  341. '10000', '100', '10000', '100'],
  342. ['BM_10PercentCPUToTime', '+0.1000',
  343. '-0.1000', '100', '110', '100', '90'],
  344. ['BM_ThirdFaster', '-0.3333', '-0.3334', '100', '67', '100', '67'],
  345. ['BM_NotBadTimeUnit', '-0.9000', '+0.2000', '0', '0', '0', '1'],
  346. ]
  347. output_lines_with_header = print_difference_report(
  348. self.json_diff_report, use_color=False)
  349. output_lines = output_lines_with_header[2:]
  350. print("\n")
  351. print("\n".join(output_lines_with_header))
  352. self.assertEqual(len(output_lines), len(expect_lines))
  353. for i in range(0, len(output_lines)):
  354. parts = [x for x in output_lines[i].split(' ') if x]
  355. self.assertEqual(len(parts), 7)
  356. self.assertEqual(expect_lines[i], parts)
  357. def test_json_diff_report_output(self):
  358. expected_output = [
  359. {
  360. 'name': 'BM_SameTimes',
  361. 'measurements': [{'time': 0.0000, 'cpu': 0.0000, 'real_time': 10, 'real_time_other': 10, 'cpu_time': 10, 'cpu_time_other': 10}],
  362. 'time_unit': 'ns',
  363. 'utest': {}
  364. },
  365. {
  366. 'name': 'BM_2xFaster',
  367. 'measurements': [{'time': -0.5000, 'cpu': -0.5000, 'real_time': 50, 'real_time_other': 25, 'cpu_time': 50, 'cpu_time_other': 25}],
  368. 'time_unit': 'ns',
  369. 'utest': {}
  370. },
  371. {
  372. 'name': 'BM_2xSlower',
  373. 'measurements': [{'time': 1.0000, 'cpu': 1.0000, 'real_time': 50, 'real_time_other': 100, 'cpu_time': 50, 'cpu_time_other': 100}],
  374. 'time_unit': 'ns',
  375. 'utest': {}
  376. },
  377. {
  378. 'name': 'BM_1PercentFaster',
  379. 'measurements': [{'time': -0.0100, 'cpu': -0.0100, 'real_time': 100, 'real_time_other': 98.9999999, 'cpu_time': 100, 'cpu_time_other': 98.9999999}],
  380. 'time_unit': 'ns',
  381. 'utest': {}
  382. },
  383. {
  384. 'name': 'BM_1PercentSlower',
  385. 'measurements': [{'time': 0.0100, 'cpu': 0.0100, 'real_time': 100, 'real_time_other': 101, 'cpu_time': 100, 'cpu_time_other': 101}],
  386. 'time_unit': 'ns',
  387. 'utest': {}
  388. },
  389. {
  390. 'name': 'BM_10PercentFaster',
  391. 'measurements': [{'time': -0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 90, 'cpu_time': 100, 'cpu_time_other': 90}],
  392. 'time_unit': 'ns',
  393. 'utest': {}
  394. },
  395. {
  396. 'name': 'BM_10PercentSlower',
  397. 'measurements': [{'time': 0.1000, 'cpu': 0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 110}],
  398. 'time_unit': 'ns',
  399. 'utest': {}
  400. },
  401. {
  402. 'name': 'BM_100xSlower',
  403. 'measurements': [{'time': 99.0000, 'cpu': 99.0000, 'real_time': 100, 'real_time_other': 10000, 'cpu_time': 100, 'cpu_time_other': 10000}],
  404. 'time_unit': 'ns',
  405. 'utest': {}
  406. },
  407. {
  408. 'name': 'BM_100xFaster',
  409. 'measurements': [{'time': -0.9900, 'cpu': -0.9900, 'real_time': 10000, 'real_time_other': 100, 'cpu_time': 10000, 'cpu_time_other': 100}],
  410. 'time_unit': 'ns',
  411. 'utest': {}
  412. },
  413. {
  414. 'name': 'BM_10PercentCPUToTime',
  415. 'measurements': [{'time': 0.1000, 'cpu': -0.1000, 'real_time': 100, 'real_time_other': 110, 'cpu_time': 100, 'cpu_time_other': 90}],
  416. 'time_unit': 'ns',
  417. 'utest': {}
  418. },
  419. {
  420. 'name': 'BM_ThirdFaster',
  421. 'measurements': [{'time': -0.3333, 'cpu': -0.3334, 'real_time': 100, 'real_time_other': 67, 'cpu_time': 100, 'cpu_time_other': 67}],
  422. 'time_unit': 'ns',
  423. 'utest': {}
  424. },
  425. {
  426. 'name': 'BM_NotBadTimeUnit',
  427. 'measurements': [{'time': -0.9000, 'cpu': 0.2000, 'real_time': 0.4, 'real_time_other': 0.04, 'cpu_time': 0.5, 'cpu_time_other': 0.6}],
  428. 'time_unit': 's',
  429. 'utest': {}
  430. },
  431. ]
  432. self.assertEqual(len(self.json_diff_report), len(expected_output))
  433. for out, expected in zip(
  434. self.json_diff_report, expected_output):
  435. self.assertEqual(out['name'], expected['name'])
  436. self.assertEqual(out['time_unit'], expected['time_unit'])
  437. assert_utest(self, out, expected)
  438. assert_measurements(self, out, expected)
  439. class TestReportDifferenceBetweenFamilies(unittest.TestCase):
  440. @classmethod
  441. def setUpClass(cls):
  442. def load_result():
  443. import json
  444. testInputs = os.path.join(
  445. os.path.dirname(
  446. os.path.realpath(__file__)),
  447. 'Inputs')
  448. testOutput = os.path.join(testInputs, 'test2_run.json')
  449. with open(testOutput, 'r') as f:
  450. json = json.load(f)
  451. return json
  452. json = load_result()
  453. json1 = filter_benchmark(json, "BM_Z.ro", ".")
  454. json2 = filter_benchmark(json, "BM_O.e", ".")
  455. cls.json_diff_report = get_difference_report(json1, json2)
  456. def test_json_diff_report_pretty_printing(self):
  457. expect_lines = [
  458. ['.', '-0.5000', '-0.5000', '10', '5', '10', '5'],
  459. ['./4', '-0.5000', '-0.5000', '40', '20', '40', '20'],
  460. ['Prefix/.', '-0.5000', '-0.5000', '20', '10', '20', '10'],
  461. ['Prefix/./3', '-0.5000', '-0.5000', '30', '15', '30', '15'],
  462. ]
  463. output_lines_with_header = print_difference_report(
  464. self.json_diff_report, use_color=False)
  465. output_lines = output_lines_with_header[2:]
  466. print("\n")
  467. print("\n".join(output_lines_with_header))
  468. self.assertEqual(len(output_lines), len(expect_lines))
  469. for i in range(0, len(output_lines)):
  470. parts = [x for x in output_lines[i].split(' ') if x]
  471. self.assertEqual(len(parts), 7)
  472. self.assertEqual(expect_lines[i], parts)
  473. def test_json_diff_report(self):
  474. expected_output = [
  475. {
  476. 'name': u'.',
  477. 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 10, 'real_time_other': 5, 'cpu_time': 10, 'cpu_time_other': 5}],
  478. 'time_unit': 'ns',
  479. 'utest': {}
  480. },
  481. {
  482. 'name': u'./4',
  483. 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 40, 'real_time_other': 20, 'cpu_time': 40, 'cpu_time_other': 20}],
  484. 'time_unit': 'ns',
  485. 'utest': {},
  486. },
  487. {
  488. 'name': u'Prefix/.',
  489. 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 20, 'real_time_other': 10, 'cpu_time': 20, 'cpu_time_other': 10}],
  490. 'time_unit': 'ns',
  491. 'utest': {}
  492. },
  493. {
  494. 'name': u'Prefix/./3',
  495. 'measurements': [{'time': -0.5, 'cpu': -0.5, 'real_time': 30, 'real_time_other': 15, 'cpu_time': 30, 'cpu_time_other': 15}],
  496. 'time_unit': 'ns',
  497. 'utest': {}
  498. }
  499. ]
  500. self.assertEqual(len(self.json_diff_report), len(expected_output))
  501. for out, expected in zip(
  502. self.json_diff_report, expected_output):
  503. self.assertEqual(out['name'], expected['name'])
  504. self.assertEqual(out['time_unit'], expected['time_unit'])
  505. assert_utest(self, out, expected)
  506. assert_measurements(self, out, expected)
  507. class TestReportDifferenceWithUTest(unittest.TestCase):
  508. @classmethod
  509. def setUpClass(cls):
  510. def load_results():
  511. import json
  512. testInputs = os.path.join(
  513. os.path.dirname(
  514. os.path.realpath(__file__)),
  515. 'Inputs')
  516. testOutput1 = os.path.join(testInputs, 'test3_run0.json')
  517. testOutput2 = os.path.join(testInputs, 'test3_run1.json')
  518. with open(testOutput1, 'r') as f:
  519. json1 = json.load(f)
  520. with open(testOutput2, 'r') as f:
  521. json2 = json.load(f)
  522. return json1, json2
  523. json1, json2 = load_results()
  524. cls.json_diff_report = get_difference_report(
  525. json1, json2, utest=True)
  526. def test_json_diff_report_pretty_printing(self):
  527. expect_lines = [
  528. ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
  529. ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
  530. ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
  531. ['BM_Two_pvalue',
  532. '1.0000',
  533. '0.6667',
  534. 'U',
  535. 'Test,',
  536. 'Repetitions:',
  537. '2',
  538. 'vs',
  539. '2.',
  540. 'WARNING:',
  541. 'Results',
  542. 'unreliable!',
  543. '9+',
  544. 'repetitions',
  545. 'recommended.'],
  546. ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
  547. ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
  548. ['short_pvalue',
  549. '0.7671',
  550. '0.2000',
  551. 'U',
  552. 'Test,',
  553. 'Repetitions:',
  554. '2',
  555. 'vs',
  556. '3.',
  557. 'WARNING:',
  558. 'Results',
  559. 'unreliable!',
  560. '9+',
  561. 'repetitions',
  562. 'recommended.'],
  563. ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53'],
  564. ]
  565. output_lines_with_header = print_difference_report(
  566. self.json_diff_report, utest=True, utest_alpha=0.05, use_color=False)
  567. output_lines = output_lines_with_header[2:]
  568. print("\n")
  569. print("\n".join(output_lines_with_header))
  570. self.assertEqual(len(output_lines), len(expect_lines))
  571. for i in range(0, len(output_lines)):
  572. parts = [x for x in output_lines[i].split(' ') if x]
  573. self.assertEqual(expect_lines[i], parts)
  574. def test_json_diff_report_pretty_printing_aggregates_only(self):
  575. expect_lines = [
  576. ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
  577. ['BM_Two_pvalue',
  578. '1.0000',
  579. '0.6667',
  580. 'U',
  581. 'Test,',
  582. 'Repetitions:',
  583. '2',
  584. 'vs',
  585. '2.',
  586. 'WARNING:',
  587. 'Results',
  588. 'unreliable!',
  589. '9+',
  590. 'repetitions',
  591. 'recommended.'],
  592. ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
  593. ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
  594. ['short_pvalue',
  595. '0.7671',
  596. '0.2000',
  597. 'U',
  598. 'Test,',
  599. 'Repetitions:',
  600. '2',
  601. 'vs',
  602. '3.',
  603. 'WARNING:',
  604. 'Results',
  605. 'unreliable!',
  606. '9+',
  607. 'repetitions',
  608. 'recommended.'],
  609. ]
  610. output_lines_with_header = print_difference_report(
  611. self.json_diff_report, include_aggregates_only=True, utest=True, utest_alpha=0.05, use_color=False)
  612. output_lines = output_lines_with_header[2:]
  613. print("\n")
  614. print("\n".join(output_lines_with_header))
  615. self.assertEqual(len(output_lines), len(expect_lines))
  616. for i in range(0, len(output_lines)):
  617. parts = [x for x in output_lines[i].split(' ') if x]
  618. self.assertEqual(expect_lines[i], parts)
  619. def test_json_diff_report(self):
  620. expected_output = [
  621. {
  622. 'name': u'BM_One',
  623. 'measurements': [
  624. {'time': -0.1,
  625. 'cpu': 0.1,
  626. 'real_time': 10,
  627. 'real_time_other': 9,
  628. 'cpu_time': 100,
  629. 'cpu_time_other': 110}
  630. ],
  631. 'time_unit': 'ns',
  632. 'utest': {}
  633. },
  634. {
  635. 'name': u'BM_Two',
  636. 'measurements': [
  637. {'time': 0.1111111111111111,
  638. 'cpu': -0.011111111111111112,
  639. 'real_time': 9,
  640. 'real_time_other': 10,
  641. 'cpu_time': 90,
  642. 'cpu_time_other': 89},
  643. {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
  644. 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
  645. ],
  646. 'time_unit': 'ns',
  647. 'utest': {
  648. 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
  649. }
  650. },
  651. {
  652. 'name': u'short',
  653. 'measurements': [
  654. {'time': -0.125,
  655. 'cpu': -0.0625,
  656. 'real_time': 8,
  657. 'real_time_other': 7,
  658. 'cpu_time': 80,
  659. 'cpu_time_other': 75},
  660. {'time': -0.4325,
  661. 'cpu': -0.13506493506493514,
  662. 'real_time': 8,
  663. 'real_time_other': 4.54,
  664. 'cpu_time': 77,
  665. 'cpu_time_other': 66.6}
  666. ],
  667. 'time_unit': 'ns',
  668. 'utest': {
  669. 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
  670. }
  671. },
  672. {
  673. 'name': u'medium',
  674. 'measurements': [
  675. {'time': -0.375,
  676. 'cpu': -0.3375,
  677. 'real_time': 8,
  678. 'real_time_other': 5,
  679. 'cpu_time': 80,
  680. 'cpu_time_other': 53}
  681. ],
  682. 'time_unit': 'ns',
  683. 'utest': {}
  684. }
  685. ]
  686. self.assertEqual(len(self.json_diff_report), len(expected_output))
  687. for out, expected in zip(
  688. self.json_diff_report, expected_output):
  689. self.assertEqual(out['name'], expected['name'])
  690. self.assertEqual(out['time_unit'], expected['time_unit'])
  691. assert_utest(self, out, expected)
  692. assert_measurements(self, out, expected)
  693. class TestReportDifferenceWithUTestWhileDisplayingAggregatesOnly(
  694. unittest.TestCase):
  695. @classmethod
  696. def setUpClass(cls):
  697. def load_results():
  698. import json
  699. testInputs = os.path.join(
  700. os.path.dirname(
  701. os.path.realpath(__file__)),
  702. 'Inputs')
  703. testOutput1 = os.path.join(testInputs, 'test3_run0.json')
  704. testOutput2 = os.path.join(testInputs, 'test3_run1.json')
  705. with open(testOutput1, 'r') as f:
  706. json1 = json.load(f)
  707. with open(testOutput2, 'r') as f:
  708. json2 = json.load(f)
  709. return json1, json2
  710. json1, json2 = load_results()
  711. cls.json_diff_report = get_difference_report(
  712. json1, json2, utest=True)
  713. def test_json_diff_report_pretty_printing(self):
  714. expect_lines = [
  715. ['BM_One', '-0.1000', '+0.1000', '10', '9', '100', '110'],
  716. ['BM_Two', '+0.1111', '-0.0111', '9', '10', '90', '89'],
  717. ['BM_Two', '-0.1250', '-0.1628', '8', '7', '86', '72'],
  718. ['BM_Two_pvalue',
  719. '1.0000',
  720. '0.6667',
  721. 'U',
  722. 'Test,',
  723. 'Repetitions:',
  724. '2',
  725. 'vs',
  726. '2.',
  727. 'WARNING:',
  728. 'Results',
  729. 'unreliable!',
  730. '9+',
  731. 'repetitions',
  732. 'recommended.'],
  733. ['short', '-0.1250', '-0.0625', '8', '7', '80', '75'],
  734. ['short', '-0.4325', '-0.1351', '8', '5', '77', '67'],
  735. ['short_pvalue',
  736. '0.7671',
  737. '0.2000',
  738. 'U',
  739. 'Test,',
  740. 'Repetitions:',
  741. '2',
  742. 'vs',
  743. '3.',
  744. 'WARNING:',
  745. 'Results',
  746. 'unreliable!',
  747. '9+',
  748. 'repetitions',
  749. 'recommended.'],
  750. ['medium', '-0.3750', '-0.3375', '8', '5', '80', '53']
  751. ]
  752. output_lines_with_header = print_difference_report(
  753. self.json_diff_report,
  754. utest=True, utest_alpha=0.05, use_color=False)
  755. output_lines = output_lines_with_header[2:]
  756. print("\n")
  757. print("\n".join(output_lines_with_header))
  758. self.assertEqual(len(output_lines), len(expect_lines))
  759. for i in range(0, len(output_lines)):
  760. parts = [x for x in output_lines[i].split(' ') if x]
  761. self.assertEqual(expect_lines[i], parts)
  762. def test_json_diff_report(self):
  763. expected_output = [
  764. {
  765. 'name': u'BM_One',
  766. 'measurements': [
  767. {'time': -0.1,
  768. 'cpu': 0.1,
  769. 'real_time': 10,
  770. 'real_time_other': 9,
  771. 'cpu_time': 100,
  772. 'cpu_time_other': 110}
  773. ],
  774. 'time_unit': 'ns',
  775. 'utest': {}
  776. },
  777. {
  778. 'name': u'BM_Two',
  779. 'measurements': [
  780. {'time': 0.1111111111111111,
  781. 'cpu': -0.011111111111111112,
  782. 'real_time': 9,
  783. 'real_time_other': 10,
  784. 'cpu_time': 90,
  785. 'cpu_time_other': 89},
  786. {'time': -0.125, 'cpu': -0.16279069767441862, 'real_time': 8,
  787. 'real_time_other': 7, 'cpu_time': 86, 'cpu_time_other': 72}
  788. ],
  789. 'time_unit': 'ns',
  790. 'utest': {
  791. 'have_optimal_repetitions': False, 'cpu_pvalue': 0.6666666666666666, 'time_pvalue': 1.0
  792. }
  793. },
  794. {
  795. 'name': u'short',
  796. 'measurements': [
  797. {'time': -0.125,
  798. 'cpu': -0.0625,
  799. 'real_time': 8,
  800. 'real_time_other': 7,
  801. 'cpu_time': 80,
  802. 'cpu_time_other': 75},
  803. {'time': -0.4325,
  804. 'cpu': -0.13506493506493514,
  805. 'real_time': 8,
  806. 'real_time_other': 4.54,
  807. 'cpu_time': 77,
  808. 'cpu_time_other': 66.6}
  809. ],
  810. 'time_unit': 'ns',
  811. 'utest': {
  812. 'have_optimal_repetitions': False, 'cpu_pvalue': 0.2, 'time_pvalue': 0.7670968684102772
  813. }
  814. },
  815. {
  816. 'name': u'medium',
  817. 'measurements': [
  818. {'real_time_other': 5,
  819. 'cpu_time': 80,
  820. 'time': -0.375,
  821. 'real_time': 8,
  822. 'cpu_time_other': 53,
  823. 'cpu': -0.3375
  824. }
  825. ],
  826. 'utest': {},
  827. 'time_unit': u'ns',
  828. 'aggregate_name': ''
  829. }
  830. ]
  831. self.assertEqual(len(self.json_diff_report), len(expected_output))
  832. for out, expected in zip(
  833. self.json_diff_report, expected_output):
  834. self.assertEqual(out['name'], expected['name'])
  835. self.assertEqual(out['time_unit'], expected['time_unit'])
  836. assert_utest(self, out, expected)
  837. assert_measurements(self, out, expected)
  838. class TestReportDifferenceForPercentageAggregates(
  839. unittest.TestCase):
  840. @classmethod
  841. def setUpClass(cls):
  842. def load_results():
  843. import json
  844. testInputs = os.path.join(
  845. os.path.dirname(
  846. os.path.realpath(__file__)),
  847. 'Inputs')
  848. testOutput1 = os.path.join(testInputs, 'test4_run0.json')
  849. testOutput2 = os.path.join(testInputs, 'test4_run1.json')
  850. with open(testOutput1, 'r') as f:
  851. json1 = json.load(f)
  852. with open(testOutput2, 'r') as f:
  853. json2 = json.load(f)
  854. return json1, json2
  855. json1, json2 = load_results()
  856. cls.json_diff_report = get_difference_report(
  857. json1, json2, utest=True)
  858. def test_json_diff_report_pretty_printing(self):
  859. expect_lines = [
  860. ['whocares', '-0.5000', '+0.5000', '0', '0', '0', '0']
  861. ]
  862. output_lines_with_header = print_difference_report(
  863. self.json_diff_report,
  864. utest=True, utest_alpha=0.05, use_color=False)
  865. output_lines = output_lines_with_header[2:]
  866. print("\n")
  867. print("\n".join(output_lines_with_header))
  868. self.assertEqual(len(output_lines), len(expect_lines))
  869. for i in range(0, len(output_lines)):
  870. parts = [x for x in output_lines[i].split(' ') if x]
  871. self.assertEqual(expect_lines[i], parts)
  872. def test_json_diff_report(self):
  873. expected_output = [
  874. {
  875. 'name': u'whocares',
  876. 'measurements': [
  877. {'time': -0.5,
  878. 'cpu': 0.5,
  879. 'real_time': 0.01,
  880. 'real_time_other': 0.005,
  881. 'cpu_time': 0.10,
  882. 'cpu_time_other': 0.15}
  883. ],
  884. 'time_unit': 'ns',
  885. 'utest': {}
  886. }
  887. ]
  888. self.assertEqual(len(self.json_diff_report), len(expected_output))
  889. for out, expected in zip(
  890. self.json_diff_report, expected_output):
  891. self.assertEqual(out['name'], expected['name'])
  892. self.assertEqual(out['time_unit'], expected['time_unit'])
  893. assert_utest(self, out, expected)
  894. assert_measurements(self, out, expected)
  895. class TestReportSorting(unittest.TestCase):
  896. @classmethod
  897. def setUpClass(cls):
  898. def load_result():
  899. import json
  900. testInputs = os.path.join(
  901. os.path.dirname(
  902. os.path.realpath(__file__)),
  903. 'Inputs')
  904. testOutput = os.path.join(testInputs, 'test4_run.json')
  905. with open(testOutput, 'r') as f:
  906. json = json.load(f)
  907. return json
  908. cls.json = load_result()
  909. def test_json_diff_report_pretty_printing(self):
  910. import util
  911. expected_names = [
  912. "99 family 0 instance 0 repetition 0",
  913. "98 family 0 instance 0 repetition 1",
  914. "97 family 0 instance 0 aggregate",
  915. "96 family 0 instance 1 repetition 0",
  916. "95 family 0 instance 1 repetition 1",
  917. "94 family 0 instance 1 aggregate",
  918. "93 family 1 instance 0 repetition 0",
  919. "92 family 1 instance 0 repetition 1",
  920. "91 family 1 instance 0 aggregate",
  921. "90 family 1 instance 1 repetition 0",
  922. "89 family 1 instance 1 repetition 1",
  923. "88 family 1 instance 1 aggregate"
  924. ]
  925. for n in range(len(self.json['benchmarks']) ** 2):
  926. random.shuffle(self.json['benchmarks'])
  927. sorted_benchmarks = util.sort_benchmark_results(self.json)[
  928. 'benchmarks']
  929. self.assertEqual(len(expected_names), len(sorted_benchmarks))
  930. for out, expected in zip(sorted_benchmarks, expected_names):
  931. self.assertEqual(out['name'], expected)
  932. def assert_utest(unittest_instance, lhs, rhs):
  933. if lhs['utest']:
  934. unittest_instance.assertAlmostEqual(
  935. lhs['utest']['cpu_pvalue'],
  936. rhs['utest']['cpu_pvalue'])
  937. unittest_instance.assertAlmostEqual(
  938. lhs['utest']['time_pvalue'],
  939. rhs['utest']['time_pvalue'])
  940. unittest_instance.assertEqual(
  941. lhs['utest']['have_optimal_repetitions'],
  942. rhs['utest']['have_optimal_repetitions'])
  943. else:
  944. # lhs is empty. assert if rhs is not.
  945. unittest_instance.assertEqual(lhs['utest'], rhs['utest'])
  946. def assert_measurements(unittest_instance, lhs, rhs):
  947. for m1, m2 in zip(lhs['measurements'], rhs['measurements']):
  948. unittest_instance.assertEqual(m1['real_time'], m2['real_time'])
  949. unittest_instance.assertEqual(m1['cpu_time'], m2['cpu_time'])
  950. # m1['time'] and m1['cpu'] hold values which are being calculated,
  951. # and therefore we must use almost-equal pattern.
  952. unittest_instance.assertAlmostEqual(m1['time'], m2['time'], places=4)
  953. unittest_instance.assertAlmostEqual(m1['cpu'], m2['cpu'], places=4)
  954. if __name__ == '__main__':
  955. unittest.main()
  956. # vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
  957. # kate: tab-width: 4; replace-tabs on; indent-width 4; tab-indents: off;
  958. # kate: indent-mode python; remove-trailing-spaces modified;