Skip to content

Commit 9d84fb2

Browse files
authored
build: add script and workflow to label potentially duplicate PRs
PR-URL: #6081 Closes: stdlib-js/metr-issue-tracker#51 Reviewed-by: Athan Reines <[email protected]>
1 parent f5e0d67 commit 9d84fb2

File tree

2 files changed

+313
-0
lines changed

2 files changed

+313
-0
lines changed
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#/
2+
# @license Apache-2.0
3+
#
4+
# Copyright (c) 2025 The Stdlib Authors.
5+
#
6+
# Licensed under the Apache License, Version 2.0 (the "License");
7+
# you may not use this file except in compliance with the License.
8+
# You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
#/
18+
19+
# Workflow name:
20+
name: check_duplicate_prs
21+
22+
# Workflow triggers:
23+
on:
24+
# Run the workflow daily at 3 AM UTC:
25+
schedule:
26+
- cron: '0 3 * * *'
27+
28+
# Allow the workflow to be manually run:
29+
workflow_dispatch:
30+
31+
# Global permissions:
32+
permissions:
33+
# Allow read-only access to the repository contents:
34+
contents: read
35+
36+
# Workflow jobs:
37+
jobs:
38+
39+
# Define a job for checking duplicate PRs...
40+
check_duplicates:
41+
42+
# Define a display name:
43+
name: 'Check Duplicate PRs'
44+
45+
# Ensure the job does not run on forks:
46+
if: github.repository == 'stdlib-js/stdlib'
47+
48+
# Define the type of virtual host machine:
49+
runs-on: ubuntu-latest
50+
51+
# Define the sequence of job steps...
52+
steps:
53+
# Checkout the repository:
54+
- name: 'Checkout repository'
55+
# Pin action to full length commit SHA
56+
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
57+
with:
58+
# Specify whether to remove untracked files before checking out the repository:
59+
clean: false
60+
61+
# Limit clone depth to the most recent commit:
62+
fetch-depth: 1
63+
64+
# Specify whether to download Git-LFS files:
65+
lfs: false
66+
timeout-minutes: 10
67+
68+
# Check for duplicate PRs:
69+
- name: 'Check for duplicate PRs'
70+
env:
71+
GITHUB_TOKEN: ${{ secrets.STDLIB_BOT_PAT_REPO_WRITE }}
72+
run: |
73+
. "$GITHUB_WORKSPACE/.github/workflows/scripts/check_duplicate_prs"
74+
timeout-minutes: 15
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
#!/usr/bin/env bash
2+
#
3+
# @license Apache-2.0
4+
#
5+
# Copyright (c) 2025 The Stdlib Authors.
6+
#
7+
# Licensed under the Apache License, Version 2.0 (the "License");
8+
# you may not use this file except in compliance with the License.
9+
# You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing, software
14+
# distributed under the License is distributed on an "AS IS" BASIS,
15+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
# See the License for the specific language governing permissions and
17+
# limitations under the License.
18+
19+
# Script to identify potentially duplicate pull requests based on the issues they resolve.
20+
#
21+
# Usage: check_duplicate_prs
22+
#
23+
# Environment variables:
24+
#
25+
# GITHUB_TOKEN GitHub token for authentication.
26+
27+
# shellcheck disable=SC2317
28+
29+
# Ensure that the exit status of pipelines is non-zero in the event that at least one of the commands in a pipeline fails:
30+
set -o pipefail
31+
32+
33+
# VARIABLES #
34+
35+
# GitHub API base URL:
36+
github_api_url="https://api.github.com"
37+
38+
# Repository owner and name:
39+
repo_owner="stdlib-js"
40+
repo_name="stdlib"
41+
42+
# Label to add/remove for duplicate PRs:
43+
duplicate_label="Potential Duplicate"
44+
45+
46+
# FUNCTIONS #
47+
48+
# Error handler.
49+
#
50+
# $1 - error status
51+
on_error() {
52+
echo 'ERROR: An error was encountered during execution.' >&2
53+
exit "$1"
54+
}
55+
56+
# Prints a success message.
57+
print_success() {
58+
echo 'Success!' >&2
59+
}
60+
61+
# Performs a GitHub API request.
62+
#
63+
# $1 - HTTP method (GET, POST, PATCH, etc.)
64+
# $2 - API endpoint
65+
# $3 - data for POST/PATCH requests
66+
github_api() {
67+
local method="$1"
68+
local endpoint="$2"
69+
local data="$3"
70+
71+
# Initialize an array to hold curl headers:
72+
local headers=()
73+
74+
# If GITHUB_TOKEN is set, add the Authorization header:
75+
if [ -n "${GITHUB_TOKEN}" ]; then
76+
headers+=("-H" "Authorization: token ${GITHUB_TOKEN}")
77+
fi
78+
79+
# For POST/PATCH requests, always set the Content-Type header:
80+
if [ "$method" != "GET" ]; then
81+
headers+=("-H" "Content-Type: application/json")
82+
fi
83+
84+
# Make the API request:
85+
if [ -n "${data}" ]; then
86+
curl -s -X "${method}" "${headers[@]}" -d "${data}" "${github_api_url}${endpoint}"
87+
else
88+
curl -s -X "${method}" "${headers[@]}" "${github_api_url}${endpoint}"
89+
fi
90+
}
91+
92+
# Extracts issue numbers resolved/closed in PRs for stdlib-js/stdlib.
93+
#
94+
# $1 - PR body text
95+
extract_resolved_issues() {
96+
local body="$1"
97+
echo "$body" | grep -Eio "(resolves|closes|close|fix|fixes|fixed|resolve)[[:space:]]*(#[0-9]+|https?://github\.com/stdlib-js/stdlib/issues/[0-9]+)" |
98+
grep -Eo "([0-9]+)$" | sort -u
99+
}
100+
101+
# Removes a label from a PR.
102+
#
103+
# $1 - PR number
104+
# $2 - label name
105+
remove_label() {
106+
local pr_number="$1"
107+
local label="$2"
108+
109+
github_api "DELETE" "/repos/${repo_owner}/${repo_name}/issues/${pr_number}/labels/${label}" || true
110+
}
111+
112+
# Main execution sequence.
113+
main() {
114+
echo "Fetching open pull requests..."
115+
116+
# Get all open PRs with pagination:
117+
open_prs="[]"
118+
page=1
119+
120+
while true; do
121+
# Fetch current page of PRs:
122+
page_data=$(github_api "GET" "/repos/${repo_owner}/${repo_name}/pulls?state=open&per_page=100&page=${page}")
123+
124+
# Check if we got any results:
125+
page_count=$(echo "$page_data" | jq length)
126+
127+
if [ "$page_count" -eq 0 ]; then
128+
# No more results, break the loop
129+
break
130+
fi
131+
132+
# Merge results with our accumulated results:
133+
open_prs=$(echo "$open_prs" "$page_data" | jq -s '.[0] + .[1]')
134+
135+
# Move to next page:
136+
page=$((page + 1))
137+
done
138+
139+
# Check if we found any PRs:
140+
pr_count=$(echo "$open_prs" | jq length)
141+
if [ "$pr_count" -eq 0 ]; then
142+
echo "No open pull requests found."
143+
print_success
144+
exit 0
145+
fi
146+
147+
echo "Found ${pr_count} open pull requests."
148+
149+
# Create arrays to store mappings and track labeled PRs:
150+
declare -a issue_prs_keys
151+
declare -a issue_prs_values
152+
declare -a labeled_prs_list
153+
154+
# Get all issues with the duplicate label in one API call
155+
echo "Fetching PRs with duplicate label..."
156+
encoded_label=${duplicate_label// /%20}
157+
labeled_prs_data=$(github_api "GET" "/repos/${repo_owner}/${repo_name}/issues?labels=${encoded_label}&state=open&per_page=100")
158+
159+
if ! echo "$labeled_prs_data" | jq -e 'if type=="array" then true else false end' > /dev/null 2>&1; then
160+
echo "Warning: Invalid response when fetching labeled PRs: ${labeled_prs_data}" >&2
161+
elif [ -n "$labeled_prs_data" ]; then
162+
while IFS= read -r labeled_pr; do
163+
pr_number=$(echo "$labeled_pr" | jq -r '.number')
164+
labeled_prs_list+=("$pr_number")
165+
done < <(echo "$labeled_prs_data" | jq -c '.[]')
166+
fi
167+
echo "Found ${#labeled_prs_list[@]} PRs with duplicate label"
168+
169+
# Process each PR to build issue mappings:
170+
echo "Processing PRs for issue references..."
171+
pr_count=0
172+
while IFS= read -r pr; do
173+
pr_number=$(echo "$pr" | jq -r '.number')
174+
pr_body=$(echo "$pr" | jq -r '.body')
175+
resolved_issues=$(extract_resolved_issues "$pr_body")
176+
177+
pr_count=$((pr_count + 1))
178+
if [ $((pr_count % 50)) -eq 0 ]; then
179+
echo "Processed ${pr_count} PRs..."
180+
fi
181+
182+
for issue in $resolved_issues; do
183+
# Find existing issue index
184+
index=-1
185+
for i in "${!issue_prs_keys[@]}"; do
186+
if [ "${issue_prs_keys[$i]}" = "$issue" ]; then
187+
index=$i
188+
break
189+
fi
190+
done
191+
if [ "$index" -eq -1 ]; then
192+
issue_prs_keys+=("$issue")
193+
issue_prs_values+=("$pr_number")
194+
else
195+
issue_prs_values[index]="${issue_prs_values[index]} $pr_number"
196+
fi
197+
done
198+
done < <(echo "${open_prs}" | jq -c '.[]')
199+
200+
# Process the mappings to find duplicates:
201+
declare -a should_be_labeled_list
202+
203+
for i in "${!issue_prs_keys[@]}"; do
204+
read -r -a prs <<< "${issue_prs_values[$i]}"
205+
if [ ${#prs[@]} -gt 1 ]; then
206+
for pr in "${prs[@]}"; do
207+
should_be_labeled_list+=("$pr")
208+
done
209+
fi
210+
done
211+
212+
echo "PRs that should have label: ${should_be_labeled_list[*]}"
213+
echo "PRs that currently have label: ${labeled_prs_list[*]}"
214+
215+
for pr in "${labeled_prs_list[@]}"; do
216+
echo "Checking if PR #${pr} should still have label..."
217+
if ! printf '%s\n' "${should_be_labeled_list[@]}" | grep -q "^${pr}$"; then
218+
echo "Removing duplicate label from PR #${pr}..."
219+
remove_label "$pr" "$duplicate_label"
220+
fi
221+
done
222+
223+
for pr in "${should_be_labeled_list[@]}"; do
224+
echo "Checking if PR #${pr} needs label..."
225+
if ! printf '%s\n' "${labeled_prs_list[@]}" | grep -q "^${pr}$"; then
226+
echo "Adding duplicate label to PR #${pr}..."
227+
github_api "POST" "/repos/${repo_owner}/${repo_name}/issues/${pr}/labels" \
228+
"{\"labels\":[\"${duplicate_label}\"]}"
229+
else
230+
echo "PR #${pr} already has label, skipping..."
231+
fi
232+
done
233+
234+
print_success
235+
exit 0
236+
}
237+
238+
# Run main:
239+
main

0 commit comments

Comments
 (0)