zephyr/scripts/set_assignees.py
Fabio Baltieri 16d723ee1b scripts: set_assignee: don't skip assignment on too many labels
The set assignee script currently exits if there's too many labels
associated with a PR. Change that to just skip the label assignment, but
continue on to assign a maintainer based on the most relevant area.

Tested with:

  ./scripts/set_assignees.py -v --dry-run -P 52716

Signed-off-by: Fabio Baltieri <fabiobaltieri@google.com>
2023-01-26 14:57:29 -05:00

237 lines
7.7 KiB
Python
Executable file

#!/usr/bin/env python3
# Copyright (c) 2022 Intel Corp.
# SPDX-License-Identifier: Apache-2.0
import argparse
import sys
import os
import time
import datetime
from github import Github, GithubException
from github.GithubException import UnknownObjectException
from collections import defaultdict
TOP_DIR = os.path.join(os.path.dirname(__file__))
sys.path.insert(0, os.path.join(TOP_DIR, "scripts"))
from get_maintainer import Maintainers
def log(s):
if args.verbose > 0:
print(s, file=sys.stdout)
def parse_args():
global args
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)
parser.add_argument("-M", "--maintainer-file", required=False, default="MAINTAINERS.yml",
help="Maintainer file to be used.")
parser.add_argument("-P", "--pull_request", required=False, default=None, type=int,
help="Operate on one pull-request only.")
parser.add_argument("-s", "--since", required=False,
help="Process pull-requests since date.")
parser.add_argument("-y", "--dry-run", action="store_true", default=False,
help="Dry run only.")
parser.add_argument("-o", "--org", default="zephyrproject-rtos",
help="Github organisation")
parser.add_argument("-r", "--repo", default="zephyr",
help="Github repository")
parser.add_argument("-v", "--verbose", action="count", default=0,
help="Verbose Output")
args = parser.parse_args()
def process_pr(gh, maintainer_file, number):
gh_repo = gh.get_repo(f"{args.org}/{args.repo}")
pr = gh_repo.get_pull(number)
log(f"working on https://github.com/{args.org}/{args.repo}/pull/{pr.number} : {pr.title}")
labels = set()
area_counter = defaultdict(int)
maint = defaultdict(int)
num_files = 0
all_areas = set()
fn = list(pr.get_files())
if len(fn) > 500:
log(f"Too many files changed ({len(fn)}), skipping....")
return
for f in pr.get_files():
num_files += 1
log(f"file: {f.filename}")
areas = maintainer_file.path2areas(f.filename)
if areas:
all_areas.update(areas)
for a in areas:
area_counter[a.name] += 1
labels.update(a.labels)
for p in a.maintainers:
maint[p] += 1
ac = dict(sorted(area_counter.items(), key=lambda item: item[1], reverse=True))
log(f"Area matches: {ac}")
log(f"labels: {labels}")
# Create a list of collaborators ordered by the area match
collab = list()
for a in ac:
collab += maintainer_file.areas[a].maintainers
collab += maintainer_file.areas[a].collaborators
collab = list(dict.fromkeys(collab))
log(f"collab: {collab}")
sm = dict(sorted(maint.items(), key=lambda item: item[1], reverse=True))
log(f"Submitted by: {pr.user.login}")
log(f"candidate maintainers: {sm}")
maintainer = "None"
maintainers = list(sm.keys())
prop = 0
if maintainers:
maintainer = maintainers[0]
if len(ac) > 1 and list(ac.values())[0] == list(ac.values())[1]:
for aa in ac:
if 'Documentation' in aa:
log("++ With multiple areas of same weight including docs, take something else other than Documentation as the maintainer")
for a in all_areas:
if (a.name == aa and
a.maintainers and a.maintainers[0] == maintainer and
len(maintainers) > 1):
maintainer = maintainers[1]
elif 'Platform' in aa:
log("++ Platform takes precedence over subsystem...")
log(f"Set maintainer of area {aa}")
for a in all_areas:
if a.name == aa:
if a.maintainers:
maintainer = a.maintainers[0]
break
# if the submitter is the same as the maintainer, check if we have
# multiple maintainers
if pr.user.login == maintainer:
log("Submitter is same as Assignee, trying to find another assignee...")
aff = list(ac.keys())[0]
for a in all_areas:
if a.name == aff:
if len(a.maintainers) > 1:
maintainer = a.maintainers[1]
else:
log(f"This area has only one maintainer, keeping assignee as {maintainer}")
prop = (maint[maintainer] / num_files) * 100
if prop < 20:
maintainer = "None"
log(f"Picked maintainer: {maintainer} ({prop:.2f}% ownership)")
log("+++++++++++++++++++++++++")
# Set labels
if labels:
if len(labels) < 10:
for l in labels:
log(f"adding label {l}...")
if not args.dry_run:
pr.add_to_labels(l)
else:
log(f"Too many labels to be applied")
if collab:
reviewers = []
existing_reviewers = set()
revs = pr.get_reviews()
for review in revs:
existing_reviewers.add(review.user)
rl = pr.get_review_requests()
page = 0
for r in rl:
existing_reviewers |= set(r.get_page(page))
page += 1
for c in collab:
try:
u = gh.get_user(c)
if pr.user != u and gh_repo.has_in_collaborators(u):
if u not in existing_reviewers:
reviewers.append(c)
except UnknownObjectException as e:
log(f"Can't get user '{c}', account does not exist anymore? ({e})")
if len(existing_reviewers) < 15:
reviewer_vacancy = 15 - len(existing_reviewers)
reviewers = reviewers[:reviewer_vacancy]
if reviewers:
try:
log(f"adding reviewers {reviewers}...")
if not args.dry_run:
pr.create_review_request(reviewers=reviewers)
except GithubException:
log("cant add reviewer")
else:
log("not adding reviewers because the existing reviewer count is greater than or "
"equal to 15")
ms = []
# assignees
if maintainer != 'None' and not pr.assignee:
try:
u = gh.get_user(maintainer)
ms.append(u)
except GithubException:
log(f"Error: Unknown user")
for mm in ms:
log(f"Adding assignee {mm}...")
if not args.dry_run:
pr.add_to_assignees(mm)
else:
log("not setting assignee")
time.sleep(1)
def main():
parse_args()
token = os.environ.get('GITHUB_TOKEN', None)
if not token:
sys.exit('Github token not set in environment, please set the '
'GITHUB_TOKEN environment variable and retry.')
gh = Github(token)
maintainer_file = Maintainers(args.maintainer_file)
if args.pull_request:
process_pr(gh, maintainer_file, args.pull_request)
else:
if args.since:
since = args.since
else:
today = datetime.date.today()
since = today - datetime.timedelta(days=1)
common_prs = f'repo:{args.org}/{args.repo} is:open is:pr base:main -is:draft no:assignee created:>{since}'
pulls = gh.search_issues(query=f'{common_prs}')
for issue in pulls:
process_pr(gh, maintainer_file, issue.number)
if __name__ == "__main__":
main()