mkowners.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. #!/usr/bin/env python3
  2. # Copyright 2017 gRPC authors.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. import argparse
  16. import collections
  17. import operator
  18. import os
  19. import re
  20. import subprocess
  21. #
  22. # Find the root of the git tree
  23. #
  24. git_root = (subprocess.check_output(['git', 'rev-parse', '--show-toplevel'
  25. ]).decode('utf-8').strip())
  26. #
  27. # Parse command line arguments
  28. #
  29. default_out = os.path.join(git_root, '.github', 'CODEOWNERS')
  30. argp = argparse.ArgumentParser('Generate .github/CODEOWNERS file')
  31. argp.add_argument('--out',
  32. '-o',
  33. type=str,
  34. default=default_out,
  35. help='Output file (default %s)' % default_out)
  36. args = argp.parse_args()
  37. #
  38. # Walk git tree to locate all OWNERS files
  39. #
  40. owners_files = [
  41. os.path.join(root, 'OWNERS')
  42. for root, dirs, files in os.walk(git_root)
  43. if 'OWNERS' in files
  44. ]
  45. #
  46. # Parse owners files
  47. #
  48. Owners = collections.namedtuple('Owners', 'parent directives dir')
  49. Directive = collections.namedtuple('Directive', 'who globs')
  50. def parse_owners(filename):
  51. with open(filename) as f:
  52. src = f.read().splitlines()
  53. parent = True
  54. directives = []
  55. for line in src:
  56. line = line.strip()
  57. # line := directive | comment
  58. if not line:
  59. continue
  60. if line[0] == '#':
  61. continue
  62. # it's a directive
  63. directive = None
  64. if line == 'set noparent':
  65. parent = False
  66. elif line == '*':
  67. directive = Directive(who='*', globs=[])
  68. elif ' ' in line:
  69. (who, globs) = line.split(' ', 1)
  70. globs_list = [glob for glob in globs.split(' ') if glob]
  71. directive = Directive(who=who, globs=globs_list)
  72. else:
  73. directive = Directive(who=line, globs=[])
  74. if directive:
  75. directives.append(directive)
  76. return Owners(parent=parent,
  77. directives=directives,
  78. dir=os.path.relpath(os.path.dirname(filename), git_root))
  79. owners_data = sorted([parse_owners(filename) for filename in owners_files],
  80. key=operator.attrgetter('dir'))
  81. #
  82. # Modify owners so that parented OWNERS files point to the actual
  83. # Owners tuple with their parent field
  84. #
  85. new_owners_data = []
  86. for owners in owners_data:
  87. if owners.parent == True:
  88. best_parent = None
  89. best_parent_score = None
  90. for possible_parent in owners_data:
  91. if possible_parent is owners:
  92. continue
  93. rel = os.path.relpath(owners.dir, possible_parent.dir)
  94. # '..' ==> we had to walk up from possible_parent to get to owners
  95. # ==> not a parent
  96. if '..' in rel:
  97. continue
  98. depth = len(rel.split(os.sep))
  99. if not best_parent or depth < best_parent_score:
  100. best_parent = possible_parent
  101. best_parent_score = depth
  102. if best_parent:
  103. owners = owners._replace(parent=best_parent.dir)
  104. else:
  105. owners = owners._replace(parent=None)
  106. new_owners_data.append(owners)
  107. owners_data = new_owners_data
  108. #
  109. # In bottom to top order, process owners data structures to build up
  110. # a CODEOWNERS file for GitHub
  111. #
  112. def full_dir(rules_dir, sub_path):
  113. return os.path.join(rules_dir, sub_path) if rules_dir != '.' else sub_path
  114. # glob using git
  115. gg_cache = {}
  116. def git_glob(glob):
  117. global gg_cache
  118. if glob in gg_cache:
  119. return gg_cache[glob]
  120. r = set(
  121. subprocess.check_output([
  122. 'git', 'ls-files', os.path.join(git_root, glob)
  123. ]).decode('utf-8').strip().splitlines())
  124. gg_cache[glob] = r
  125. return r
  126. def expand_directives(root, directives):
  127. globs = collections.OrderedDict()
  128. # build a table of glob --> owners
  129. for directive in directives:
  130. for glob in directive.globs or ['**']:
  131. if glob not in globs:
  132. globs[glob] = []
  133. if directive.who not in globs[glob]:
  134. globs[glob].append(directive.who)
  135. # expand owners for intersecting globs
  136. sorted_globs = sorted(list(globs.keys()),
  137. key=lambda g: len(git_glob(full_dir(root, g))),
  138. reverse=True)
  139. out_globs = collections.OrderedDict()
  140. for glob_add in sorted_globs:
  141. who_add = globs[glob_add]
  142. pre_items = [i for i in list(out_globs.items())]
  143. out_globs[glob_add] = who_add.copy()
  144. for glob_have, who_have in pre_items:
  145. files_add = git_glob(full_dir(root, glob_add))
  146. files_have = git_glob(full_dir(root, glob_have))
  147. intersect = files_have.intersection(files_add)
  148. if intersect:
  149. for f in sorted(files_add): # sorted to ensure merge stability
  150. if f not in intersect:
  151. out_globs[os.path.relpath(f, start=root)] = who_add
  152. for who in who_have:
  153. if who not in out_globs[glob_add]:
  154. out_globs[glob_add].append(who)
  155. return out_globs
  156. def add_parent_to_globs(parent, globs, globs_dir):
  157. if not parent:
  158. return
  159. for owners in owners_data:
  160. if owners.dir == parent:
  161. owners_globs = expand_directives(owners.dir, owners.directives)
  162. for oglob, oglob_who in list(owners_globs.items()):
  163. for gglob, gglob_who in list(globs.items()):
  164. files_parent = git_glob(full_dir(owners.dir, oglob))
  165. files_child = git_glob(full_dir(globs_dir, gglob))
  166. intersect = files_parent.intersection(files_child)
  167. gglob_who_orig = gglob_who.copy()
  168. if intersect:
  169. for f in sorted(files_child
  170. ): # sorted to ensure merge stability
  171. if f not in intersect:
  172. who = gglob_who_orig.copy()
  173. globs[os.path.relpath(f, start=globs_dir)] = who
  174. for who in oglob_who:
  175. if who not in gglob_who:
  176. gglob_who.append(who)
  177. add_parent_to_globs(owners.parent, globs, globs_dir)
  178. return
  179. assert (False)
  180. todo = owners_data.copy()
  181. done = set()
  182. with open(args.out, 'w') as out:
  183. out.write('# Auto-generated by the tools/mkowners/mkowners.py tool\n')
  184. out.write('# Uses OWNERS files in different modules throughout the\n')
  185. out.write('# repository as the source of truth for module ownership.\n')
  186. written_globs = []
  187. while todo:
  188. head, *todo = todo
  189. if head.parent and not head.parent in done:
  190. todo.append(head)
  191. continue
  192. globs = expand_directives(head.dir, head.directives)
  193. add_parent_to_globs(head.parent, globs, head.dir)
  194. for glob, owners in list(globs.items()):
  195. skip = False
  196. for glob1, owners1, dir1 in reversed(written_globs):
  197. files = git_glob(full_dir(head.dir, glob))
  198. files1 = git_glob(full_dir(dir1, glob1))
  199. intersect = files.intersection(files1)
  200. if files == intersect:
  201. if sorted(owners) == sorted(owners1):
  202. skip = True # nothing new in this rule
  203. break
  204. elif intersect:
  205. # continuing would cause a semantic change since some files are
  206. # affected differently by this rule and CODEOWNERS is order dependent
  207. break
  208. if not skip:
  209. out.write('/%s %s\n' %
  210. (full_dir(head.dir, glob), ' '.join(owners)))
  211. written_globs.append((glob, owners, head.dir))
  212. done.add(head.dir)