feat: pre-commit hook blocks sensitive files and key patterns
This commit is contained in:
parent
06624e020c
commit
3276ff4498
2 changed files with 126 additions and 0 deletions
76
.githooks/pre-commit
Executable file
76
.githooks/pre-commit
Executable file
|
|
@ -0,0 +1,76 @@
|
|||
#!/usr/bin/env bash
|
||||
# .githooks/pre-commit — blocks sensitive files and API key patterns
|
||||
set -euo pipefail
|
||||
|
||||
RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m'
|
||||
|
||||
BLOCKED_PATHS=(
|
||||
"config/user.yaml"
|
||||
"config/server.yaml"
|
||||
"config/llm.yaml"
|
||||
"config/notion.yaml"
|
||||
"config/adzuna.yaml"
|
||||
"config/label_tool.yaml"
|
||||
".env"
|
||||
)
|
||||
|
||||
BLOCKED_PATTERNS=(
|
||||
"data/.*\.db$"
|
||||
"data/.*\.jsonl$"
|
||||
"demo/data/.*\.db$"
|
||||
)
|
||||
|
||||
KEY_REGEXES=(
|
||||
'sk-[A-Za-z0-9]{20,}'
|
||||
'Bearer [A-Za-z0-9\-_]{20,}'
|
||||
'api_key:[[:space:]]*["\x27]?[A-Za-z0-9\-_]{16,}'
|
||||
)
|
||||
|
||||
ERRORS=0
|
||||
|
||||
# Get list of staged files
|
||||
EMPTY_TREE="4b825dc642cb6eb9a060e54bf8d69288fbee4904"
|
||||
mapfile -t staged_files < <(git diff-index --cached --name-only HEAD 2>/dev/null || \
|
||||
git diff-index --cached --name-only "$EMPTY_TREE")
|
||||
|
||||
for file in "${staged_files[@]}"; do
|
||||
# Exact path blocklist
|
||||
for blocked in "${BLOCKED_PATHS[@]}"; do
|
||||
if [[ "$file" == "$blocked" ]]; then
|
||||
echo -e "${RED}BLOCKED:${NC} $file is in the sensitive file blocklist."
|
||||
echo -e " Use: ${YELLOW}git restore --staged $file${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Pattern blocklist
|
||||
for pattern in "${BLOCKED_PATTERNS[@]}"; do
|
||||
if echo "$file" | grep -qE "$pattern"; then
|
||||
echo -e "${RED}BLOCKED:${NC} $file matches sensitive path pattern ($pattern)."
|
||||
echo -e " Add to .gitignore or: ${YELLOW}git restore --staged $file${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
# Content scan for key patterns (only on existing staged files)
|
||||
if [[ -f "$file" ]]; then
|
||||
staged_content=$(git diff --cached -- "$file" 2>/dev/null | grep '^+' | grep -v '^+++' || true)
|
||||
for regex in "${KEY_REGEXES[@]}"; do
|
||||
if echo "$staged_content" | grep -qE "$regex"; then
|
||||
echo -e "${RED}BLOCKED:${NC} $file appears to contain an API key or token."
|
||||
echo -e " Pattern matched: ${YELLOW}$regex${NC}"
|
||||
echo -e " Review with: ${YELLOW}git diff --cached -- $file${NC}"
|
||||
echo -e " Use: ${YELLOW}git restore --staged $file${NC}"
|
||||
ERRORS=$((ERRORS + 1))
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $ERRORS -gt 0 ]]; then
|
||||
echo ""
|
||||
echo -e "${RED}Commit blocked.${NC} Fix the issues above and try again."
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
50
tests/test_hooks.sh
Executable file
50
tests/test_hooks.sh
Executable file
|
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
HOOK=".githooks/pre-commit"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
cd "$REPO_DIR"
|
||||
|
||||
pass() { echo " PASS: $1"; }
|
||||
fail() { echo " FAIL: $1"; exit 1; }
|
||||
|
||||
# Helper: run hook against a fake staged file list
|
||||
run_hook_with() {
|
||||
local staged_file="$1"
|
||||
local staged_content="${2:-}"
|
||||
local tmpdir
|
||||
tmpdir=$(mktemp -d)
|
||||
|
||||
# Create shim that reports our file as staged
|
||||
cat > "$tmpdir/git" <<SHIM
|
||||
#!/usr/bin/env bash
|
||||
if [[ "\$*" == *"diff-index"* ]]; then
|
||||
echo "$staged_file"
|
||||
elif [[ "\$*" == *"diff"*"--cached"* ]]; then
|
||||
echo "$staged_content"
|
||||
else
|
||||
command git "\$@"
|
||||
fi
|
||||
SHIM
|
||||
chmod +x "$tmpdir/git"
|
||||
PATH="$tmpdir:$PATH" bash "$HOOK" 2>&1
|
||||
local status=$?
|
||||
rm -rf "$tmpdir"
|
||||
return $status
|
||||
}
|
||||
|
||||
echo "Test 1: blocks config/user.yaml"
|
||||
run_hook_with "config/user.yaml" && fail "should have blocked" || pass "blocked user.yaml"
|
||||
|
||||
echo "Test 2: blocks .env"
|
||||
run_hook_with ".env" && fail "should have blocked" || pass "blocked .env"
|
||||
|
||||
echo "Test 3: blocks content with OpenAI key pattern"
|
||||
run_hook_with "app/app.py" "+sk-abcdefghijklmnopqrstuvwxyz123456" && \
|
||||
fail "should have blocked key pattern" || pass "blocked key pattern"
|
||||
|
||||
echo "Test 4: allows safe file"
|
||||
run_hook_with "app/app.py" "import streamlit" && pass "allowed safe file" || \
|
||||
fail "should have allowed safe file"
|
||||
|
||||
echo "All pre-commit hook tests passed."
|
||||
Loading…
Reference in a new issue