summary.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280
  1. #!/usr/bin/env python3
  2. #
  3. # Script to summarize the outputs of other scripts. Operates on CSV files.
  4. #
  5. import functools as ft
  6. import collections as co
  7. import os
  8. import csv
  9. import re
  10. import math as m
  11. # displayable fields
  12. Field = co.namedtuple('Field', 'name,parse,acc,key,fmt,repr,null,ratio')
  13. FIELDS = [
  14. # name, parse, accumulate, fmt, print, null
  15. Field('code',
  16. lambda r: int(r['code_size']),
  17. sum,
  18. lambda r: r,
  19. '%7s',
  20. lambda r: r,
  21. '-',
  22. lambda old, new: (new-old)/old),
  23. Field('data',
  24. lambda r: int(r['data_size']),
  25. sum,
  26. lambda r: r,
  27. '%7s',
  28. lambda r: r,
  29. '-',
  30. lambda old, new: (new-old)/old),
  31. Field('stack',
  32. lambda r: float(r['stack_limit']),
  33. max,
  34. lambda r: r,
  35. '%7s',
  36. lambda r: '∞' if m.isinf(r) else int(r),
  37. '-',
  38. lambda old, new: (new-old)/old),
  39. Field('structs',
  40. lambda r: int(r['struct_size']),
  41. sum,
  42. lambda r: r,
  43. '%8s',
  44. lambda r: r,
  45. '-',
  46. lambda old, new: (new-old)/old),
  47. Field('coverage',
  48. lambda r: (int(r['coverage_hits']), int(r['coverage_count'])),
  49. lambda rs: ft.reduce(lambda a, b: (a[0]+b[0], a[1]+b[1]), rs),
  50. lambda r: r[0]/r[1],
  51. '%19s',
  52. lambda r: '%11s %7s' % ('%d/%d' % (r[0], r[1]), '%.1f%%' % (100*r[0]/r[1])),
  53. '%11s %7s' % ('-', '-'),
  54. lambda old, new: ((new[0]/new[1]) - (old[0]/old[1])))
  55. ]
  56. def main(**args):
  57. def openio(path, mode='r'):
  58. if path == '-':
  59. if 'r' in mode:
  60. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  61. else:
  62. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  63. else:
  64. return open(path, mode)
  65. # find results
  66. results = co.defaultdict(lambda: {})
  67. for path in args.get('csv_paths', '-'):
  68. try:
  69. with openio(path) as f:
  70. r = csv.DictReader(f)
  71. for result in r:
  72. file = result.pop('file', '')
  73. name = result.pop('name', '')
  74. prev = results[(file, name)]
  75. for field in FIELDS:
  76. try:
  77. r = field.parse(result)
  78. if field.name in prev:
  79. results[(file, name)][field.name] = field.acc(
  80. [prev[field.name], r])
  81. else:
  82. results[(file, name)][field.name] = r
  83. except (KeyError, ValueError):
  84. pass
  85. except FileNotFoundError:
  86. pass
  87. # find fields
  88. if args.get('all_fields'):
  89. fields = FIELDS
  90. elif args.get('fields') is not None:
  91. fields_dict = {field.name: field for field in FIELDS}
  92. fields = [fields_dict[f] for f in args['fields']]
  93. else:
  94. fields = []
  95. for field in FIELDS:
  96. if any(field.name in result for result in results.values()):
  97. fields.append(field)
  98. # find total for every field
  99. total = {}
  100. for result in results.values():
  101. for field in fields:
  102. if field.name in result and field.name in total:
  103. total[field.name] = field.acc(
  104. [total[field.name], result[field.name]])
  105. elif field.name in result:
  106. total[field.name] = result[field.name]
  107. # find previous results?
  108. if args.get('diff'):
  109. prev_results = co.defaultdict(lambda: {})
  110. try:
  111. with openio(args['diff']) as f:
  112. r = csv.DictReader(f)
  113. for result in r:
  114. file = result.pop('file', '')
  115. name = result.pop('name', '')
  116. prev = prev_results[(file, name)]
  117. for field in FIELDS:
  118. try:
  119. r = field.parse(result)
  120. if field.name in prev:
  121. prev_results[(file, name)][field.name] = field.acc(
  122. [prev[field.name], r])
  123. else:
  124. prev_results[(file, name)][field.name] = r
  125. except (KeyError, ValueError):
  126. pass
  127. except FileNotFoundError:
  128. pass
  129. prev_total = {}
  130. for result in prev_results.values():
  131. for field in fields:
  132. if field.name in result and field.name in prev_total:
  133. prev_total[field.name] = field.acc(
  134. [prev_total[field.name], result[field.name]])
  135. elif field.name in result:
  136. prev_total[field.name] = result[field.name]
  137. # print results
  138. def dedup_entries(results, by='name'):
  139. entries = co.defaultdict(lambda: {})
  140. for (file, func), result in results.items():
  141. entry = (file if by == 'file' else func)
  142. prev = entries[entry]
  143. for field in fields:
  144. if field.name in result and field.name in prev:
  145. entries[entry][field.name] = field.acc(
  146. [prev[field.name], result[field.name]])
  147. elif field.name in result:
  148. entries[entry][field.name] = result[field.name]
  149. return entries
  150. def sorted_entries(entries):
  151. if args.get('sort') is not None:
  152. field = {field.name: field for field in FIELDS}[args['sort']]
  153. return sorted(entries, key=lambda x: (
  154. -(field.key(x[1][field.name])) if field.name in x[1] else -1, x))
  155. elif args.get('reverse_sort') is not None:
  156. field = {field.name: field for field in FIELDS}[args['reverse_sort']]
  157. return sorted(entries, key=lambda x: (
  158. +(field.key(x[1][field.name])) if field.name in x[1] else -1, x))
  159. else:
  160. return sorted(entries)
  161. def print_header(by=''):
  162. if not args.get('diff'):
  163. print('%-36s' % by, end='')
  164. for field in fields:
  165. print((' '+field.fmt) % field.name, end='')
  166. print()
  167. else:
  168. print('%-36s' % by, end='')
  169. for field in fields:
  170. print((' '+field.fmt) % field.name, end='')
  171. print(' %-9s' % '', end='')
  172. print()
  173. def print_entry(name, result):
  174. print('%-36s' % name, end='')
  175. for field in fields:
  176. r = result.get(field.name)
  177. if r is not None:
  178. print((' '+field.fmt) % field.repr(r), end='')
  179. else:
  180. print((' '+field.fmt) % '-', end='')
  181. print()
  182. def print_diff_entry(name, old, new):
  183. print('%-36s' % name, end='')
  184. for field in fields:
  185. n = new.get(field.name)
  186. if n is not None:
  187. print((' '+field.fmt) % field.repr(n), end='')
  188. else:
  189. print((' '+field.fmt) % '-', end='')
  190. o = old.get(field.name)
  191. ratio = (
  192. 0.0 if m.isinf(o or 0) and m.isinf(n or 0)
  193. else +float('inf') if m.isinf(n or 0)
  194. else -float('inf') if m.isinf(o or 0)
  195. else 0.0 if not o and not n
  196. else +1.0 if not o
  197. else -1.0 if not n
  198. else field.ratio(o, n))
  199. print(' %-9s' % (
  200. '' if not ratio
  201. else '(+∞%)' if ratio > 0 and m.isinf(ratio)
  202. else '(-∞%)' if ratio < 0 and m.isinf(ratio)
  203. else '(%+.1f%%)' % (100*ratio)), end='')
  204. print()
  205. def print_entries(by='name'):
  206. entries = dedup_entries(results, by=by)
  207. if not args.get('diff'):
  208. print_header(by=by)
  209. for name, result in sorted_entries(entries.items()):
  210. print_entry(name, result)
  211. else:
  212. prev_entries = dedup_entries(prev_results, by=by)
  213. print_header(by='%s (%d added, %d removed)' % (by,
  214. sum(1 for name in entries if name not in prev_entries),
  215. sum(1 for name in prev_entries if name not in entries)))
  216. for name, result in sorted_entries(entries.items()):
  217. if args.get('all') or result != prev_entries.get(name, {}):
  218. print_diff_entry(name, prev_entries.get(name, {}), result)
  219. def print_totals():
  220. if not args.get('diff'):
  221. print_entry('TOTAL', total)
  222. else:
  223. print_diff_entry('TOTAL', prev_total, total)
  224. if args.get('summary'):
  225. print_header()
  226. print_totals()
  227. elif args.get('files'):
  228. print_entries(by='file')
  229. print_totals()
  230. else:
  231. print_entries(by='name')
  232. print_totals()
  233. if __name__ == "__main__":
  234. import argparse
  235. import sys
  236. parser = argparse.ArgumentParser(
  237. description="Summarize measurements")
  238. parser.add_argument('csv_paths', nargs='*', default='-',
  239. help="Description of where to find *.csv files. May be a directory \
  240. or list of paths. *.csv files will be merged to show the total \
  241. coverage.")
  242. parser.add_argument('-d', '--diff',
  243. help="Specify CSV file to diff against.")
  244. parser.add_argument('-a', '--all', action='store_true',
  245. help="Show all objects, not just the ones that changed.")
  246. parser.add_argument('-e', '--all-fields', action='store_true',
  247. help="Show all fields, even those with no results.")
  248. parser.add_argument('-f', '--fields', type=lambda x: re.split('\s*,\s*', x),
  249. help="Comma separated list of fields to print, by default all fields \
  250. that are found in the CSV files are printed.")
  251. parser.add_argument('-s', '--sort',
  252. help="Sort by this field.")
  253. parser.add_argument('-S', '--reverse-sort',
  254. help="Sort by this field, but backwards.")
  255. parser.add_argument('-F', '--files', action='store_true',
  256. help="Show file-level calls.")
  257. parser.add_argument('-Y', '--summary', action='store_true',
  258. help="Only show the totals.")
  259. sys.exit(main(**vars(parser.parse_args())))