feat: pre-commit hook blocks sensitive files and key patterns
This commit is contained in:
parent
f1194cacc9
commit
7c87a7e6cf
2 changed files with 126 additions and 0 deletions
76
.githooks/pre-commit
Executable file
76
.githooks/pre-commit
Executable file
|
|
@ -0,0 +1,76 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# .githooks/pre-commit — blocks sensitive files and API key patterns
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
RED='\033[0;31m'; YELLOW='\033[1;33m'; NC='\033[0m'
|
||||||
|
|
||||||
|
BLOCKED_PATHS=(
|
||||||
|
"config/user.yaml"
|
||||||
|
"config/server.yaml"
|
||||||
|
"config/llm.yaml"
|
||||||
|
"config/notion.yaml"
|
||||||
|
"config/adzuna.yaml"
|
||||||
|
"config/label_tool.yaml"
|
||||||
|
".env"
|
||||||
|
)
|
||||||
|
|
||||||
|
BLOCKED_PATTERNS=(
|
||||||
|
"data/.*\.db$"
|
||||||
|
"data/.*\.jsonl$"
|
||||||
|
"demo/data/.*\.db$"
|
||||||
|
)
|
||||||
|
|
||||||
|
KEY_REGEXES=(
|
||||||
|
'sk-[A-Za-z0-9]{20,}'
|
||||||
|
'Bearer [A-Za-z0-9\-_]{20,}'
|
||||||
|
'api_key:[[:space:]]*["\x27]?[A-Za-z0-9\-_]{16,}'
|
||||||
|
)
|
||||||
|
|
||||||
|
ERRORS=0
|
||||||
|
|
||||||
|
# Get list of staged files
|
||||||
|
EMPTY_TREE="4b825dc642cb6eb9a060e54bf8d69288fbee4904"
|
||||||
|
mapfile -t staged_files < <(git diff-index --cached --name-only HEAD 2>/dev/null || \
|
||||||
|
git diff-index --cached --name-only "$EMPTY_TREE")
|
||||||
|
|
||||||
|
for file in "${staged_files[@]}"; do
|
||||||
|
# Exact path blocklist
|
||||||
|
for blocked in "${BLOCKED_PATHS[@]}"; do
|
||||||
|
if [[ "$file" == "$blocked" ]]; then
|
||||||
|
echo -e "${RED}BLOCKED:${NC} $file is in the sensitive file blocklist."
|
||||||
|
echo -e " Use: ${YELLOW}git restore --staged $file${NC}"
|
||||||
|
ERRORS=$((ERRORS + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Pattern blocklist
|
||||||
|
for pattern in "${BLOCKED_PATTERNS[@]}"; do
|
||||||
|
if echo "$file" | grep -qE "$pattern"; then
|
||||||
|
echo -e "${RED}BLOCKED:${NC} $file matches sensitive path pattern ($pattern)."
|
||||||
|
echo -e " Add to .gitignore or: ${YELLOW}git restore --staged $file${NC}"
|
||||||
|
ERRORS=$((ERRORS + 1))
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
# Content scan for key patterns (only on existing staged files)
|
||||||
|
if [[ -f "$file" ]]; then
|
||||||
|
staged_content=$(git diff --cached -- "$file" 2>/dev/null | grep '^+' | grep -v '^+++' || true)
|
||||||
|
for regex in "${KEY_REGEXES[@]}"; do
|
||||||
|
if echo "$staged_content" | grep -qE "$regex"; then
|
||||||
|
echo -e "${RED}BLOCKED:${NC} $file appears to contain an API key or token."
|
||||||
|
echo -e " Pattern matched: ${YELLOW}$regex${NC}"
|
||||||
|
echo -e " Review with: ${YELLOW}git diff --cached -- $file${NC}"
|
||||||
|
echo -e " Use: ${YELLOW}git restore --staged $file${NC}"
|
||||||
|
ERRORS=$((ERRORS + 1))
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
if [[ $ERRORS -gt 0 ]]; then
|
||||||
|
echo ""
|
||||||
|
echo -e "${RED}Commit blocked.${NC} Fix the issues above and try again."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
exit 0
|
||||||
50
tests/test_hooks.sh
Executable file
50
tests/test_hooks.sh
Executable file
|
|
@ -0,0 +1,50 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -euo pipefail
|
||||||
|
HOOK=".githooks/pre-commit"
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
REPO_DIR="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
|
cd "$REPO_DIR"
|
||||||
|
|
||||||
|
pass() { echo " PASS: $1"; }
|
||||||
|
fail() { echo " FAIL: $1"; exit 1; }
|
||||||
|
|
||||||
|
# Helper: run hook against a fake staged file list
|
||||||
|
run_hook_with() {
|
||||||
|
local staged_file="$1"
|
||||||
|
local staged_content="${2:-}"
|
||||||
|
local tmpdir
|
||||||
|
tmpdir=$(mktemp -d)
|
||||||
|
|
||||||
|
# Create shim that reports our file as staged
|
||||||
|
cat > "$tmpdir/git" <<SHIM
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
if [[ "\$*" == *"diff-index"* ]]; then
|
||||||
|
echo "$staged_file"
|
||||||
|
elif [[ "\$*" == *"diff"*"--cached"* ]]; then
|
||||||
|
echo "$staged_content"
|
||||||
|
else
|
||||||
|
command git "\$@"
|
||||||
|
fi
|
||||||
|
SHIM
|
||||||
|
chmod +x "$tmpdir/git"
|
||||||
|
PATH="$tmpdir:$PATH" bash "$HOOK" 2>&1
|
||||||
|
local status=$?
|
||||||
|
rm -rf "$tmpdir"
|
||||||
|
return $status
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "Test 1: blocks config/user.yaml"
|
||||||
|
run_hook_with "config/user.yaml" && fail "should have blocked" || pass "blocked user.yaml"
|
||||||
|
|
||||||
|
echo "Test 2: blocks .env"
|
||||||
|
run_hook_with ".env" && fail "should have blocked" || pass "blocked .env"
|
||||||
|
|
||||||
|
echo "Test 3: blocks content with OpenAI key pattern"
|
||||||
|
run_hook_with "app/app.py" "+sk-abcdefghijklmnopqrstuvwxyz123456" && \
|
||||||
|
fail "should have blocked key pattern" || pass "blocked key pattern"
|
||||||
|
|
||||||
|
echo "Test 4: allows safe file"
|
||||||
|
run_hook_with "app/app.py" "import streamlit" && pass "allowed safe file" || \
|
||||||
|
fail "should have allowed safe file"
|
||||||
|
|
||||||
|
echo "All pre-commit hook tests passed."
|
||||||
Loading…
Reference in a new issue