Skip to content

Add disallowed words pre-commit check #20

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ tests/codemod/repos/extra/*.json filter=lfs diff=lfs merge=lfs -text
**/expected_diff.patch filter=lfs diff=lfs merge=lfs -text
tests/codemod/repos/repos.json filter=lfs diff=lfs merge=lfs -text
tests/verified_codemods/** filter=lfs diff=lfs merge=lfs -text
.github/disallowed-words.txt filter=lfs diff=lfs merge=lfs -text
3 changes: 3 additions & 0 deletions .github/disallowed-words.txt
Git LFS file not shown
7 changes: 7 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,13 @@ repos:
pass_filenames: false
always_run: true
entry: bash -c "uv run --frozen --all-extras --dev deptry src codegen-git/src --ignore DEP001"
- repo: "local"
hooks:
- id: disallowed-words-check
name: Check for disallowed words
entry: scripts/disallowed-words-check.sh
language: script
files: '' # Check all files
- repo: https://github.com/renovatebot/pre-commit-hooks
rev: 39.120.3
hooks:
Expand Down
71 changes: 71 additions & 0 deletions scripts/disallowed-words-check.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env bash

DISALLOWED_WORDS_FILE=".github/disallowed-words.txt"

# 1) If the file doesn't exist, fail the commit.
if [[ ! -f "$DISALLOWED_WORDS_FILE" ]]; then
echo "ERROR: $DISALLOWED_WORDS_FILE not found."
echo "Cannot proceed with disallowed word checks."
exit 1
fi

# 2) If Git LFS isn't installed, fail the commit.
if ! command -v git-lfs &>/dev/null; then
echo "ERROR: Git LFS not installed or not in PATH."
echo "Cannot proceed with disallowed word checks."
exit 1
fi

# 3) If the file is still an LFS pointer (not synced), fail the commit.
if grep -q "https://git-lfs.github.com/spec/v1" "$DISALLOWED_WORDS_FILE"; then
echo "ERROR: $DISALLOWED_WORDS_FILE is an LFS pointer but not synced."
echo "Cannot proceed with disallowed word checks."
exit 1
fi

# 4) Read the disallowed words (one per line).
DISALLOWED_WORDS="$(grep -v '^[[:space:]]*$' "$DISALLOWED_WORDS_FILE")"
if [[ -z "$DISALLOWED_WORDS" ]]; then
echo "ERROR: No disallowed words found in $DISALLOWED_WORDS_FILE."
echo "Cannot proceed with disallowed word checks."
exit 1
fi

# Build a single regex WITHOUT word boundaries.
# NOTE: This is intentionally strict. For example, banning "cat" also flags "catastrophic".
# Will tweak and change this to a more lenient regex later.
DISALLOWED_REGEX="($(echo "$DISALLOWED_WORDS" | paste -s -d '|' -))"

# 5) Find staged files that are Added (A) or Modified (M).
FILES_TO_CHECK=$(git diff --cached --name-status | egrep '^[AM]' | cut -f2)
# FILES_TO_CHECK=$(git ls-files) # Uncomment this to check ALL files - CAUTION: SLOW!

FAILED=0

# 6) For each file:
# - Check the filename itself.
# - Check the file contents (if it exists).
for FILE in $FILES_TO_CHECK; do
FILENAME_MATCHES=$(echo "$FILE" | grep -i -E -o "$DISALLOWED_REGEX")
if [[ -n "$FILENAME_MATCHES" ]]; then
echo "ERROR: Filename '$FILE' contains these disallowed words:"
echo "$FILENAME_MATCHES"
FAILED=1
fi

if [[ -f "$FILE" ]]; then
CONTENT_MATCHES=$(grep -I -i -E -o "$DISALLOWED_REGEX" "$FILE" | sort -u)
if [[ -n "$CONTENT_MATCHES" ]]; then
echo "ERROR: File '$FILE' contains these disallowed words:"
echo "$CONTENT_MATCHES"
FAILED=1
fi
fi
done

# 7) Block commit if any violations were found.
if [[ $FAILED -eq 1 ]]; then
exit 1
fi

exit 0
Loading