Skip to content

Commit 8c5b646

Browse files
Add disallowed words pre-commit check (#20)
# Motivation <!-- Why is this change necessary? --> # Content <!-- Please include a summary of the change --> # Testing <!-- How was the change tested? --> # Please check the following before marking your PR as ready for review - [ ] I have added tests for my changes - [ ] I have updated the documentation or added new documentation as needed - [ ] I have read and agree to the [Contributor License Agreement](../CLA.md) --------- Co-authored-by: Edward Li <[email protected]>
1 parent f4b8939 commit 8c5b646

File tree

4 files changed

+82
-0
lines changed

4 files changed

+82
-0
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ tests/codemod/repos/extra/*.json filter=lfs diff=lfs merge=lfs -text
33
**/expected_diff.patch filter=lfs diff=lfs merge=lfs -text
44
tests/codemod/repos/repos.json filter=lfs diff=lfs merge=lfs -text
55
tests/verified_codemods/** filter=lfs diff=lfs merge=lfs -text
6+
.github/disallowed-words.txt filter=lfs diff=lfs merge=lfs -text

.github/disallowed-words.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
version https://git-lfs.github.com/spec/v1
2+
oid sha256:ac0b7be00533f882fb174c498c27372fa78d3ec2d65c0e885475bf92b43b7303
3+
size 337

.pre-commit-config.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@ repos:
7979
pass_filenames: false
8080
always_run: true
8181
entry: bash -c "uv run --frozen --all-extras --dev deptry src codegen-git/src --ignore DEP001"
82+
- repo: "local"
83+
hooks:
84+
- id: disallowed-words-check
85+
name: Check for disallowed words
86+
entry: scripts/disallowed-words-check.sh
87+
language: script
88+
files: '' # Check all files
8289
- repo: https://github.com/renovatebot/pre-commit-hooks
8390
rev: 39.120.3
8491
hooks:

scripts/disallowed-words-check.sh

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#!/usr/bin/env bash
2+
3+
DISALLOWED_WORDS_FILE=".github/disallowed-words.txt"
4+
5+
# 1) If the file doesn't exist, fail the commit.
6+
if [[ ! -f "$DISALLOWED_WORDS_FILE" ]]; then
7+
echo "ERROR: $DISALLOWED_WORDS_FILE not found."
8+
echo "Cannot proceed with disallowed word checks."
9+
exit 1
10+
fi
11+
12+
# 2) If Git LFS isn't installed, fail the commit.
13+
if ! command -v git-lfs &>/dev/null; then
14+
echo "ERROR: Git LFS not installed or not in PATH."
15+
echo "Cannot proceed with disallowed word checks."
16+
exit 1
17+
fi
18+
19+
# 3) If the file is still an LFS pointer (not synced), fail the commit.
20+
if grep -q "https://git-lfs.github.com/spec/v1" "$DISALLOWED_WORDS_FILE"; then
21+
echo "ERROR: $DISALLOWED_WORDS_FILE is an LFS pointer but not synced."
22+
echo "Cannot proceed with disallowed word checks."
23+
exit 1
24+
fi
25+
26+
# 4) Read the disallowed words (one per line).
27+
DISALLOWED_WORDS="$(grep -v '^[[:space:]]*$' "$DISALLOWED_WORDS_FILE")"
28+
if [[ -z "$DISALLOWED_WORDS" ]]; then
29+
echo "ERROR: No disallowed words found in $DISALLOWED_WORDS_FILE."
30+
echo "Cannot proceed with disallowed word checks."
31+
exit 1
32+
fi
33+
34+
# Build a single regex WITHOUT word boundaries.
35+
# NOTE: This is intentionally strict. For example, banning "cat" also flags "catastrophic".
36+
# Will tweak and change this to a more lenient regex later.
37+
DISALLOWED_REGEX="($(echo "$DISALLOWED_WORDS" | paste -s -d '|' -))"
38+
39+
# 5) Find staged files that are Added (A) or Modified (M).
40+
FILES_TO_CHECK=$(git diff --cached --name-status | egrep '^[AM]' | cut -f2)
41+
# FILES_TO_CHECK=$(git ls-files) # Uncomment this to check ALL files - CAUTION: SLOW!
42+
43+
FAILED=0
44+
45+
# 6) For each file:
46+
# - Check the filename itself.
47+
# - Check the file contents (if it exists).
48+
for FILE in $FILES_TO_CHECK; do
49+
FILENAME_MATCHES=$(echo "$FILE" | grep -i -E -o "$DISALLOWED_REGEX")
50+
if [[ -n "$FILENAME_MATCHES" ]]; then
51+
echo "ERROR: Filename '$FILE' contains these disallowed words:"
52+
echo "$FILENAME_MATCHES"
53+
FAILED=1
54+
fi
55+
56+
if [[ -f "$FILE" ]]; then
57+
CONTENT_MATCHES=$(grep -I -i -E -o "$DISALLOWED_REGEX" "$FILE" | sort -u)
58+
if [[ -n "$CONTENT_MATCHES" ]]; then
59+
echo "ERROR: File '$FILE' contains these disallowed words:"
60+
echo "$CONTENT_MATCHES"
61+
FAILED=1
62+
fi
63+
fi
64+
done
65+
66+
# 7) Block commit if any violations were found.
67+
if [[ $FAILED -eq 1 ]]; then
68+
exit 1
69+
fi
70+
71+
exit 0

0 commit comments

Comments
 (0)