[None][infra] Update the auto-community label action to be triggered every hour (#5658)

Signed-off-by: Po-Wei Wang (Vincent) <poweiw@nvidia.com>
This commit is contained in:
Po-Wei (Vincent) 2025-07-03 09:56:30 -07:00 committed by GitHub
parent 528ff52ef4
commit 0566fa1697
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 127 additions and 29 deletions

View File

@ -1,5 +1,6 @@
import os
import sys
from datetime import datetime, timedelta, timezone
import requests
@ -97,6 +98,73 @@ def add_label_to_pr(repo_owner: str, repo_name: str, pr_number: str,
raise e
def get_recent_open_prs(repo_owner: str,
repo_name: str,
minutes_back: int = 65):
"""Get open PRs created or updated in the last N minutes."""
cutoff_time = datetime.now(timezone.utc) - timedelta(minutes=minutes_back)
url = f"{GITHUB_API_URL}/repos/{repo_owner}/{repo_name}/pulls"
params = {
"state": "open",
"sort": "updated",
"direction": "desc",
"per_page": 100
}
recent_prs = []
page = 1
try:
while True:
params["page"] = page
response = requests.get(url,
headers=HEADERS,
params=params,
timeout=30)
response.raise_for_status()
page_prs = response.json()
if not page_prs: # no more PRs
break
found_old_pr = False
for pr in page_prs:
created_at = datetime.strptime(
pr["created_at"],
"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
updated_at = datetime.strptime(
pr["updated_at"],
"%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if created_at >= cutoff_time or updated_at >= cutoff_time:
recent_prs.append(pr)
else:
# since sorted by updated desc, once we hit an old PR we can stop
found_old_pr = True
break
if found_old_pr:
break
page += 1
# safety limit to avoid infinite loops
if page > 10: # max 1000 PRs (100 * 10)
print(
f"Warning: Hit pagination limit at page {page}, may have missed some PRs"
)
break
print(
f"Found {len(recent_prs)} PRs created/updated in the last {minutes_back} minutes (checked {page} pages)"
)
return recent_prs
except requests.exceptions.RequestException as e:
print(f"Error fetching PRs: {e}")
raise
def main():
"""
Main function to check user membership and apply community labels.
@ -106,45 +174,69 @@ def main():
1 - Failed to determine user membership (API permission issues)
2 - Failed to add community label (labeling API issues)
"""
pr_author = os.environ.get("PR_AUTHOR")
assert pr_author, "PR_AUTHOR environment variable not set"
pr_number = os.environ.get("PR_NUMBER")
assert pr_number, "PR_NUMBER environment variable not set"
repo_owner = os.environ.get("REPO_OWNER")
assert repo_owner, "REPO_OWNER environment variable not set"
repo_name = os.environ.get("REPO_NAME")
assert repo_name, "REPO_NAME environment variable not set"
community_label = os.environ.get("COMMUNITY_LABEL")
assert community_label, "COMMUNITY_LABEL environment variable not set"
time_window_minutes = int(os.environ.get("TIME_WINDOW_MINUTES"))
print(
f"Starting NVIDIA membership check for PR author '{pr_author}' on PR #{pr_number}."
f"Starting community PR labeling sweep for {repo_owner}/{repo_name}. Time window: {time_window_minutes} minutes."
)
try:
is_member = check_user_membership("NVIDIA", pr_author)
except RuntimeError as e:
print(
f"Critical error during NVIDIA membership check for '{pr_author}': {e}"
)
print("Halting script due to inability to determine membership status.")
recent_prs = get_recent_open_prs(repo_owner, repo_name,
time_window_minutes)
except requests.exceptions.RequestException:
print("Failed to fetch recent PRs")
sys.exit(1)
print(
f"User '{pr_author}' is determined to be an NVIDIA member: {is_member}")
processed_count = 0
labeled_count = 0
for pr in recent_prs:
pr_number = pr["number"]
pr_author = pr["user"]["login"]
existing_labels = {label["name"] for label in pr["labels"]}
if community_label in existing_labels:
print(
f"PR #{pr_number} by {pr_author} already has community label, skipping"
)
continue
print(f"Processing PR #{pr_number} by {pr_author}")
processed_count += 1
if not is_member:
print(
f"User '{pr_author}' is a community user. Adding label '{community_label}'."
)
try:
add_label_to_pr(repo_owner, repo_name, pr_number, community_label)
except requests.exceptions.RequestException as e:
print(f"Failed to add community label: {e}")
sys.exit(2)
else:
print(
f"User '{pr_author}' is an NVIDIA member. No label will be added.")
is_member = check_user_membership("NVIDIA", pr_author)
except RuntimeError as e:
print(
f"Critical error during NVIDIA membership check for '{pr_author}': {e}"
)
print("Continuing with next PR...")
continue
if not is_member:
print(
f"User '{pr_author}' is a community user. Adding label '{community_label}'."
)
try:
add_label_to_pr(repo_owner, repo_name, str(pr_number),
community_label)
labeled_count += 1
except requests.exceptions.RequestException as e:
print(f"Failed to add community label to PR #{pr_number}: {e}")
# continue with other PRs instead of exiting
continue
else:
print(f"User '{pr_author}' is an NVIDIA member. No label needed.")
print(
f"Sweep complete: processed {processed_count} PRs, labeled {labeled_count} as community"
)
if __name__ == "__main__":

View File

@ -1,8 +1,15 @@
name: Label Community PR
on:
pull_request:
types: [opened]
schedule:
- cron: '0 * * * *' # every hour at minute 0
workflow_dispatch: # manual trigger option
inputs:
time_window_minutes:
description: 'Time window in minutes to look back for PRs'
required: false
default: 65
type: number
jobs:
label_pr:
@ -22,9 +29,8 @@ jobs:
- name: Run labeling script
env:
AUTO_LABEL_COMMUNITY_TOKEN: ${{ secrets.AUTO_LABEL_COMMUNITY_TOKEN }}
PR_AUTHOR: ${{ github.event.pull_request.user.login }}
PR_NUMBER: ${{ github.event.pull_request.number }}
REPO_OWNER: ${{ github.event.repository.owner.login }}
REPO_OWNER: ${{ github.repository_owner }}
REPO_NAME: ${{ github.event.repository.name }}
COMMUNITY_LABEL: "Community want to contribute"
TIME_WINDOW_MINUTES: ${{ inputs.time_window_minutes || 65 }}
run: python .github/scripts/label_community_user.py