ci: merged_pr stats: fix a bug an minor tweaks

We are not setting the review rule value correctly and default to 'no'
for all prs, even if they were correctly reviewed by assignees.

Minor other cleanups.

Signed-off-by: Anas Nashif <anas.nashif@intel.com>
This commit is contained in:
Anas Nashif 2024-01-10 21:22:14 -05:00
parent 0a50cb9179
commit 7fedc81666

View file

@ -13,6 +13,7 @@ import argparse
from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk
from datetime import timedelta
import pprint
date_format = '%Y-%m-%d %H:%M:%S'
@ -21,8 +22,11 @@ def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)
parser.add_argument('--pull-request', required=True, help='pull request number', type=int)
parser.add_argument('--repo', required=True, help='github repo')
parser.add_argument('--pull-request', help='pull request number', type=int)
parser.add_argument('--range', help='execute based on a date range, for example 2023-01-01..2023-01-05')
parser.add_argument('--repo', help='github repo', default='zephyrproject-rtos/zephyr')
parser.add_argument('--es-index', help='Elasticsearch index')
parser.add_argument('-y','--dry-run', action="store_true", help='dry run, do not upload data')
return parser.parse_args()
@ -33,6 +37,90 @@ def gendata(data, index):
"_source": t
}
def process_pr(pr):
reviews = pr.get_reviews()
print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})')
assignee_reviews = 0
prj = {}
assignees = []
labels = []
for label in pr.labels:
labels.append(label.name)
reviewers = set()
for review in reviews:
# get list of all approved reviews
if review.user and review.state == 'APPROVED':
reviewers.add(review.user.login)
for assignee in pr.assignees:
# list assignees for later checks
assignees.append(assignee.login)
if assignee.login in reviewers:
assignee_reviews += 1
if assignee_reviews > 0 or pr.merged_by.login in assignees:
# in case of assignee reviews or if PR was merged by an assignee
prj['review_rule'] = "yes"
elif not pr.assignees or \
(pr.user.login in assignees and len(assignees) == 1) or \
('Trivial' in labels or 'Hotfix' in labels):
# in case where no assignees set or if submitter is the only assignee
# or in case of trivial or hotfixes
prj['review_rule'] = "na"
else:
# everything else
prj['review_rule'] = "no"
# calculate time the PR was in review, hours and business days.
delta = pr.closed_at - pr.created_at
deltah = delta.total_seconds() / 3600
prj['hours_open'] = deltah
dates = (pr.created_at + timedelta(idx + 1) for idx in range((pr.closed_at - pr.created_at).days))
# Get number of business days per the guidelines, we need at least 2.
business_days = sum(1 for day in dates if day.weekday() < 5)
prj['business_days_open'] = business_days
# less than 2 business days ...
if business_days < 2 and not ('Trivial' in labels or 'Hotfix' in labels) or \
deltah < 4 and 'Trivial' in labels:
prj['time_rule'] = "no"
else:
prj['time_rule'] = "yes"
# This is all data we get easily though the Github API and serves as the basis
# for displaying some trends and metrics.
# Data can be extended in the future if we find more information that
# is useful through the API
prj['nr'] = pr.number
prj['url'] = pr.url
prj['title'] = pr.title
prj['comments'] = pr.comments
prj['reviews'] = reviews.totalCount
prj['assignees'] = assignees
prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S")
prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S")
prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S")
prj['merged_by'] = pr.merged_by.login
prj['submitted_by'] = pr.user.login
prj['changed_files'] = pr.changed_files
prj['additions'] = pr.additions
prj['deletions'] = pr.deletions
prj['commits'] = pr.commits
# The branch we are targeting. main vs release branches.
prj['base'] = pr.base.ref
# list all reviewers
prj['reviewers'] = list(reviewers)
prj['labels'] = labels
return prj
def main():
args = parse_args()
token = os.environ.get('GITHUB_TOKEN')
@ -46,112 +134,35 @@ def main():
if args.pull_request:
pr = gh_repo.get_pull(args.pull_request)
reviews = pr.get_reviews()
print(f'#{pr.number}: {pr.title} - {pr.comments} Comments, reviews: {reviews.totalCount}, {len(pr.assignees)} Assignees (Updated {pr.updated_at})')
assignee_reviews = 0
reviewers = set()
prj = {}
for r in reviews:
if r.user and r.state == 'APPROVED':
reviewers.add(r.user.login)
if pr.assignees and r.user:
for assignee in pr.assignees:
if r.user.login == assignee.login:
assignee_reviews = assignee_reviews + 1
# was reviewed at least by one assignee
prj['reviewed_by_assignee'] = "yes"
# This is all data we get easily though the Github API and serves as the basis
# for displaying some trends and metrics.
# Data can be extended in the future if we find more information that
# is useful through the API
prj['nr'] = pr.number
prj['url'] = pr.url
prj['title'] = pr.title
prj['comments'] = pr.comments
prj['reviews'] = reviews.totalCount
prj['assignees'] = len(pr.assignees)
prj['updated'] = pr.updated_at.strftime("%Y-%m-%d %H:%M:%S")
prj['created'] = pr.created_at.strftime("%Y-%m-%d %H:%M:%S")
prj['closed'] = pr.closed_at.strftime("%Y-%m-%d %H:%M:%S")
prj['merged_by'] = pr.merged_by.login
prj['submitted_by'] = pr.user.login
prj['changed_files'] = pr.changed_files
prj['additions'] = pr.additions
prj['deletions'] = pr.deletions
prj['commits'] = pr.commits
# The branch we are targeting. main vs release branches.
prj['base'] = pr.base.ref
ll = []
for l in pr.labels:
ll.append(l.name)
prj['labels'] = ll
# take first assignee, otherwise we have no assignees and this rule is not applicable
if pr.assignee:
prj['assignee'] = pr.assignee.login
else:
prj['assignee'] = "none"
prj['reviewed_by_assignee'] = "na"
prj['review_rule'] = "na"
# go through all assignees and check if anyone has approved and reset assignee to the one who approved
for assignee in pr.assignees:
if assignee.login in reviewers:
prj['assignee'] = assignee.login
elif assignee.login == pr.user.login:
prj['reviewed_by_assignee'] = "yes"
# list assignees for later checks
assignees = [a.login for a in pr.assignees]
# Deal with exceptions when assignee approval is not needed.
if 'Trivial' in ll or 'Hotfix' in ll:
prj['review_rule'] = "yes"
elif pr.merged_by.login in assignees:
prj['review_rule'] = "yes"
else:
prj['review_rule'] = "no"
prj['assignee_reviews'] = assignee_reviews
delta = pr.closed_at - pr.created_at
deltah = delta.total_seconds() / 3600
prj['hours_open'] = deltah
dates = (pr.created_at + timedelta(idx + 1) for idx in range((pr.closed_at - pr.created_at).days))
# Get number of business days per the guidelines, we need at least 2.
res = sum(1 for day in dates if day.weekday() < 5)
if res < 2 and not ('Trivial' in ll or 'Hotfix' in ll):
prj['time_rule'] = False
elif deltah < 4 and 'Trivial' in ll:
prj['time_rule'] = False
else:
prj['time_rule'] = True
prj['reviewers'] = list(reviewers)
prj = process_pr(pr)
json_list.append(prj)
elif args.range:
query = f'repo:{args.repo} merged:{args.range} is:pr is:closed sort:updated-desc base:main'
prs = gh.search_issues(query=f'{query}')
for _pr in prs:
pr = gh_repo.get_pull(_pr.number)
prj = process_pr(pr)
json_list.append(prj)
if json_list and not args.dry_run:
# Send data over to elasticsearch.
es = Elasticsearch(
[os.environ['ELASTICSEARCH_SERVER']],
api_key=os.environ['ELASTICSEARCH_KEY'],
verify_certs=False
)
# Send data over to elasticsearch.
es = Elasticsearch(
[os.environ['ELASTICSEARCH_SERVER']],
api_key=os.environ['ELASTICSEARCH_KEY'],
verify_certs=False
)
try:
index = os.environ['PR_STAT_ES_INDEX']
bulk(es, gendata(json_list, index))
except KeyError as e:
print(f"Error: {e} not set.")
print(json_list)
try:
if args.es_index:
index = args.es_index
else:
index = os.environ['PR_STAT_ES_INDEX']
bulk(es, gendata(json_list, index))
except KeyError as e:
print(f"Error: {e} not set.")
print(json_list)
if args.dry_run:
pprint.pprint(json_list)
if __name__ == "__main__":
main()