feat: pre-commit hook blocks sensitive files and key patterns

This commit is contained in:
pyr0ball 2026-03-02 19:02:15 -08:00
parent f1194cacc9
commit 7c87a7e6cf
2 changed files with 126 additions and 0 deletions

76
.githooks/pre-commit Executable file
View file

@ -0,0 +1,76 @@
#!/usr/bin/env bash
# .githooks/pre-commit — blocks sensitive files and API key patterns
set -euo pipefail
RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m'
BLOCKED_PATHS=(
"config/user.yaml"
"config/server.yaml"
"config/llm.yaml"
"config/notion.yaml"
"config/adzuna.yaml"
"config/label_tool.yaml"
".env"
)
BLOCKED_PATTERNS=(
"data/.*\.db$"
"data/.*\.jsonl$"
"demo/data/.*\.db$"
)
KEY_REGEXES=(
'sk-[A-Za-z0-9]{20,}'
'Bearer [A-Za-z0-9\-_]{20,}'
'api_key:[[:space:]]*["\x27]?[A-Za-z0-9\-_]{16,}'
)
ERRORS=0
# Get list of staged files
EMPTY_TREE="4b825dc642cb6eb9a060e54bf8d69288fbee4904"
mapfile -t staged_files < <(git diff-index --cached --name-only HEAD 2>/dev/null || \
git diff-index --cached --name-only "$EMPTY_TREE")
for file in "${staged_files[@]}"; do
# Exact path blocklist
for blocked in "${BLOCKED_PATHS[@]}"; do
if [[ "$file" == "$blocked" ]]; then
echo -e "${RED}BLOCKED:${NC} $file is in the sensitive file blocklist."
echo -e " Use: ${YELLOW}git restore --staged $file${NC}"
ERRORS=$((ERRORS + 1))
fi
done
# Pattern blocklist
for pattern in "${BLOCKED_PATTERNS[@]}"; do
if echo "$file" | grep -qE "$pattern"; then
echo -e "${RED}BLOCKED:${NC} $file matches sensitive path pattern ($pattern)."
echo -e " Add to .gitignore or: ${YELLOW}git restore --staged $file${NC}"
ERRORS=$((ERRORS + 1))
fi
done
# Content scan for key patterns (only on existing staged files)
if [[ -f "$file" ]]; then
staged_content=$(git diff --cached -- "$file" 2>/dev/null | grep '^+' | grep -v '^+++' || true)
for regex in "${KEY_REGEXES[@]}"; do
if echo "$staged_content" | grep -qE "$regex"; then
echo -e "${RED}BLOCKED:${NC} $file appears to contain an API key or token."
echo -e " Pattern matched: ${YELLOW}$regex${NC}"
echo -e " Review with: ${YELLOW}git diff --cached -- $file${NC}"
echo -e " Use: ${YELLOW}git restore --staged $file${NC}"
ERRORS=$((ERRORS + 1))
break
fi
done
fi
done
if [[ $ERRORS -gt 0 ]]; then
echo ""
echo -e "${RED}Commit blocked.${NC} Fix the issues above and try again."
exit 1
fi
exit 0

50
tests/test_hooks.sh Executable file
View file

@ -0,0 +1,50 @@
#!/usr/bin/env bash
set -euo pipefail
HOOK=".githooks/pre-commit"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$REPO_DIR"
pass() { echo " PASS: $1"; }
fail() { echo " FAIL: $1"; exit 1; }
# Helper: run hook against a fake staged file list
run_hook_with() {
local staged_file="$1"
local staged_content="${2:-}"
local tmpdir
tmpdir=$(mktemp -d)
# Create shim that reports our file as staged
cat > "$tmpdir/git" <<SHIM
#!/usr/bin/env bash
if [[ "\$*" == *"diff-index"* ]]; then
echo "$staged_file"
elif [[ "\$*" == *"diff"*"--cached"* ]]; then
echo "$staged_content"
else
command git "\$@"
fi
SHIM
chmod +x "$tmpdir/git"
PATH="$tmpdir:$PATH" bash "$HOOK" 2>&1
local status=$?
rm -rf "$tmpdir"
return $status
}
echo "Test 1: blocks config/user.yaml"
run_hook_with "config/user.yaml" && fail "should have blocked" || pass "blocked user.yaml"
echo "Test 2: blocks .env"
run_hook_with ".env" && fail "should have blocked" || pass "blocked .env"
echo "Test 3: blocks content with OpenAI key pattern"
run_hook_with "app/app.py" "+sk-abcdefghijklmnopqrstuvwxyz123456" && \
fail "should have blocked key pattern" || pass "blocked key pattern"
echo "Test 4: allows safe file"
run_hook_with "app/app.py" "import streamlit" && pass "allowed safe file" || \
fail "should have allowed safe file"
echo "All pre-commit hook tests passed."