data.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. #!/usr/bin/env python3
  2. #
  3. # Script to find data size at the function level. Basically just a bit wrapper
  4. # around nm with some extra conveniences for comparing builds. Heavily inspired
  5. # by Linux's Bloat-O-Meter.
  6. #
  7. import os
  8. import glob
  9. import itertools as it
  10. import subprocess as sp
  11. import shlex
  12. import re
  13. import csv
  14. import collections as co
  15. OBJ_PATHS = ['*.o']
  16. def collect(paths, **args):
  17. results = co.defaultdict(lambda: 0)
  18. pattern = re.compile(
  19. '^(?P<size>[0-9a-fA-F]+)' +
  20. ' (?P<type>[%s])' % re.escape(args['type']) +
  21. ' (?P<func>.+?)$')
  22. for path in paths:
  23. # note nm-tool may contain extra args
  24. cmd = args['nm_tool'] + ['--size-sort', path]
  25. if args.get('verbose'):
  26. print(' '.join(shlex.quote(c) for c in cmd))
  27. proc = sp.Popen(cmd,
  28. stdout=sp.PIPE,
  29. stderr=sp.PIPE if not args.get('verbose') else None,
  30. universal_newlines=True,
  31. errors='replace')
  32. for line in proc.stdout:
  33. m = pattern.match(line)
  34. if m:
  35. results[(path, m.group('func'))] += int(m.group('size'), 16)
  36. proc.wait()
  37. if proc.returncode != 0:
  38. if not args.get('verbose'):
  39. for line in proc.stderr:
  40. sys.stdout.write(line)
  41. sys.exit(-1)
  42. flat_results = []
  43. for (file, func), size in results.items():
  44. # map to source files
  45. if args.get('build_dir'):
  46. file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
  47. # replace .o with .c, different scripts report .o/.c, we need to
  48. # choose one if we want to deduplicate csv files
  49. file = re.sub('\.o$', '.c', file)
  50. # discard internal functions
  51. if not args.get('everything'):
  52. if func.startswith('__'):
  53. continue
  54. # discard .8449 suffixes created by optimizer
  55. func = re.sub('\.[0-9]+', '', func)
  56. flat_results.append((file, func, size))
  57. return flat_results
  58. def main(**args):
  59. def openio(path, mode='r'):
  60. if path == '-':
  61. if 'r' in mode:
  62. return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
  63. else:
  64. return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
  65. else:
  66. return open(path, mode)
  67. # find sizes
  68. if not args.get('use', None):
  69. # find .o files
  70. paths = []
  71. for path in args['obj_paths']:
  72. if os.path.isdir(path):
  73. path = path + '/*.o'
  74. for path in glob.glob(path):
  75. paths.append(path)
  76. if not paths:
  77. print('no .obj files found in %r?' % args['obj_paths'])
  78. sys.exit(-1)
  79. results = collect(paths, **args)
  80. else:
  81. with openio(args['use']) as f:
  82. r = csv.DictReader(f)
  83. results = [
  84. ( result['file'],
  85. result['name'],
  86. int(result['data_size']))
  87. for result in r
  88. if result.get('data_size') not in {None, ''}]
  89. total = 0
  90. for _, _, size in results:
  91. total += size
  92. # find previous results?
  93. if args.get('diff'):
  94. try:
  95. with openio(args['diff']) as f:
  96. r = csv.DictReader(f)
  97. prev_results = [
  98. ( result['file'],
  99. result['name'],
  100. int(result['data_size']))
  101. for result in r
  102. if result.get('data_size') not in {None, ''}]
  103. except FileNotFoundError:
  104. prev_results = []
  105. prev_total = 0
  106. for _, _, size in prev_results:
  107. prev_total += size
  108. # write results to CSV
  109. if args.get('output'):
  110. merged_results = co.defaultdict(lambda: {})
  111. other_fields = []
  112. # merge?
  113. if args.get('merge'):
  114. try:
  115. with openio(args['merge']) as f:
  116. r = csv.DictReader(f)
  117. for result in r:
  118. file = result.pop('file', '')
  119. func = result.pop('name', '')
  120. result.pop('data_size', None)
  121. merged_results[(file, func)] = result
  122. other_fields = result.keys()
  123. except FileNotFoundError:
  124. pass
  125. for file, func, size in results:
  126. merged_results[(file, func)]['data_size'] = size
  127. with openio(args['output'], 'w') as f:
  128. w = csv.DictWriter(f, ['file', 'name', *other_fields, 'data_size'])
  129. w.writeheader()
  130. for (file, func), result in sorted(merged_results.items()):
  131. w.writerow({'file': file, 'name': func, **result})
  132. # print results
  133. def dedup_entries(results, by='name'):
  134. entries = co.defaultdict(lambda: 0)
  135. for file, func, size in results:
  136. entry = (file if by == 'file' else func)
  137. entries[entry] += size
  138. return entries
  139. def diff_entries(olds, news):
  140. diff = co.defaultdict(lambda: (0, 0, 0, 0))
  141. for name, new in news.items():
  142. diff[name] = (0, new, new, 1.0)
  143. for name, old in olds.items():
  144. _, new, _, _ = diff[name]
  145. diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
  146. return diff
  147. def sorted_entries(entries):
  148. if args.get('size_sort'):
  149. return sorted(entries, key=lambda x: (-x[1], x))
  150. elif args.get('reverse_size_sort'):
  151. return sorted(entries, key=lambda x: (+x[1], x))
  152. else:
  153. return sorted(entries)
  154. def sorted_diff_entries(entries):
  155. if args.get('size_sort'):
  156. return sorted(entries, key=lambda x: (-x[1][1], x))
  157. elif args.get('reverse_size_sort'):
  158. return sorted(entries, key=lambda x: (+x[1][1], x))
  159. else:
  160. return sorted(entries, key=lambda x: (-x[1][3], x))
  161. def print_header(by=''):
  162. if not args.get('diff'):
  163. print('%-36s %7s' % (by, 'size'))
  164. else:
  165. print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
  166. def print_entry(name, size):
  167. print("%-36s %7d" % (name, size))
  168. def print_diff_entry(name, old, new, diff, ratio):
  169. print("%-36s %7s %7s %+7d%s" % (name,
  170. old or "-",
  171. new or "-",
  172. diff,
  173. ' (%+.1f%%)' % (100*ratio) if ratio else ''))
  174. def print_entries(by='name'):
  175. entries = dedup_entries(results, by=by)
  176. if not args.get('diff'):
  177. print_header(by=by)
  178. for name, size in sorted_entries(entries.items()):
  179. print_entry(name, size)
  180. else:
  181. prev_entries = dedup_entries(prev_results, by=by)
  182. diff = diff_entries(prev_entries, entries)
  183. print_header(by='%s (%d added, %d removed)' % (by,
  184. sum(1 for old, _, _, _ in diff.values() if not old),
  185. sum(1 for _, new, _, _ in diff.values() if not new)))
  186. for name, (old, new, diff, ratio) in sorted_diff_entries(
  187. diff.items()):
  188. if ratio or args.get('all'):
  189. print_diff_entry(name, old, new, diff, ratio)
  190. def print_totals():
  191. if not args.get('diff'):
  192. print_entry('TOTAL', total)
  193. else:
  194. ratio = (0.0 if not prev_total and not total
  195. else 1.0 if not prev_total
  196. else (total-prev_total)/prev_total)
  197. print_diff_entry('TOTAL',
  198. prev_total, total,
  199. total-prev_total,
  200. ratio)
  201. if args.get('quiet'):
  202. pass
  203. elif args.get('summary'):
  204. print_header()
  205. print_totals()
  206. elif args.get('files'):
  207. print_entries(by='file')
  208. print_totals()
  209. else:
  210. print_entries(by='name')
  211. print_totals()
  212. if __name__ == "__main__":
  213. import argparse
  214. import sys
  215. parser = argparse.ArgumentParser(
  216. description="Find data size at the function level.")
  217. parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
  218. help="Description of where to find *.o files. May be a directory \
  219. or a list of paths. Defaults to %r." % OBJ_PATHS)
  220. parser.add_argument('-v', '--verbose', action='store_true',
  221. help="Output commands that run behind the scenes.")
  222. parser.add_argument('-q', '--quiet', action='store_true',
  223. help="Don't show anything, useful with -o.")
  224. parser.add_argument('-o', '--output',
  225. help="Specify CSV file to store results.")
  226. parser.add_argument('-u', '--use',
  227. help="Don't compile and find data sizes, instead use this CSV file.")
  228. parser.add_argument('-d', '--diff',
  229. help="Specify CSV file to diff data size against.")
  230. parser.add_argument('-m', '--merge',
  231. help="Merge with an existing CSV file when writing to output.")
  232. parser.add_argument('-a', '--all', action='store_true',
  233. help="Show all functions, not just the ones that changed.")
  234. parser.add_argument('-A', '--everything', action='store_true',
  235. help="Include builtin and libc specific symbols.")
  236. parser.add_argument('-s', '--size-sort', action='store_true',
  237. help="Sort by size.")
  238. parser.add_argument('-S', '--reverse-size-sort', action='store_true',
  239. help="Sort by size, but backwards.")
  240. parser.add_argument('-F', '--files', action='store_true',
  241. help="Show file-level data sizes. Note this does not include padding! "
  242. "So sizes may differ from other tools.")
  243. parser.add_argument('-Y', '--summary', action='store_true',
  244. help="Only show the total data size.")
  245. parser.add_argument('--type', default='dDbB',
  246. help="Type of symbols to report, this uses the same single-character "
  247. "type-names emitted by nm. Defaults to %(default)r.")
  248. parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
  249. help="Path to the nm tool to use.")
  250. parser.add_argument('--build-dir',
  251. help="Specify the relative build directory. Used to map object files \
  252. to the correct source files.")
  253. sys.exit(main(**vars(parser.parse_args())))