Testing Redaction
How to verify that sensitive data is actually redacted before deploying to production.
Basic Verification
Use capture_logs to test redaction:
import pytest
from fapilog import LoggerBuilder
from fapilog.testing import capture_logs
@pytest.mark.asyncio
async def test_password_is_redacted():
"""Verify password fields are masked in log output."""
async with capture_logs() as logs:
logger = await (
LoggerBuilder()
.with_redaction(fields=["password"])
.build_async()
)
await logger.info("Login attempt", username="alice", password="hunter2")
# Password value should not appear
assert "hunter2" not in logs.text
# Mask should appear instead
assert "***" in logs.text
# Non-sensitive data preserved
assert "alice" in logs.text
Testing Preset Coverage
Verify compliance presets cover expected fields:
@pytest.mark.asyncio
async def test_gdpr_preset_redacts_email():
"""GDPR preset should redact email addresses."""
async with capture_logs() as logs:
logger = await (
LoggerBuilder()
.with_redaction(preset="GDPR_PII")
.build_async()
)
await logger.info("User signup", email="test@example.com", name="John Doe")
assert "test@example.com" not in logs.text
assert "John Doe" not in logs.text # name is also PII
@pytest.mark.asyncio
async def test_hipaa_preset_redacts_mrn():
"""HIPAA preset should redact medical record numbers."""
async with capture_logs() as logs:
logger = await (
LoggerBuilder()
.with_redaction(preset="HIPAA_PHI")
.build_async()
)
await logger.info("Patient visit", mrn="MRN-12345", ssn="123-45-6789")
assert "MRN-12345" not in logs.text
assert "123-45-6789" not in logs.text
Testing Pattern Matching
Verify regex patterns catch variations:
@pytest.mark.asyncio
async def test_pattern_catches_variations():
"""Pattern should catch password variations."""
async with capture_logs() as logs:
logger = await (
LoggerBuilder()
.with_redaction(patterns=[r"(?i).*password.*"])
.build_async()
)
await logger.info(
"Auth data",
user_password="secret1",
password_hash="abc123",
old_passwd="secret2",
)
assert "secret1" not in logs.text
assert "abc123" not in logs.text
# Note: "passwd" doesn't match "password" pattern
# Add separate pattern if needed
Testing URL Credential Stripping
@pytest.mark.asyncio
async def test_url_credentials_stripped():
"""URL credentials should be stripped by default."""
async with capture_logs() as logs:
logger = await LoggerBuilder().build_async()
await logger.info(
"Database connection",
url="postgres://admin:supersecret@db.example.com/app",
)
# Credentials stripped
assert "supersecret" not in logs.text
assert "admin:" not in logs.text
# Host preserved
assert "db.example.com" in logs.text
Testing Limitations
Document expected behavior for unsupported scenarios:
@pytest.mark.asyncio
async def test_message_string_not_redacted():
"""PII in message string is NOT redacted - this is expected."""
async with capture_logs() as logs:
logger = await (
LoggerBuilder()
.with_redaction(preset="GDPR_PII")
.build_async()
)
# WRONG way to log PII
await logger.info("User email: test@example.com")
# PII IS exposed - this test documents the limitation
assert "test@example.com" in logs.text
@pytest.mark.asyncio
async def test_arbitrary_field_name_not_redacted():
"""Arbitrary field names are NOT redacted unless configured."""
async with capture_logs() as logs:
logger = await (
LoggerBuilder()
.with_redaction(preset="GDPR_PII")
.build_async()
)
# Field name "customer_contact" not in GDPR preset
await logger.info("Ticket", customer_contact="test@example.com")
# NOT redacted - field name doesn't match
assert "test@example.com" in logs.text
CI/CD Verification
Forbidden Patterns Test
Fail CI if sensitive patterns appear in logs:
import re
FORBIDDEN_PATTERNS = [
r"\b[A-Za-z0-9]{32,}\b", # Long tokens
r"\b\d{3}-\d{2}-\d{4}\b", # SSN format
r"password\s*[:=]\s*\S+", # password=value
r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+", # Email addresses
]
@pytest.mark.asyncio
async def test_no_sensitive_patterns_in_logs():
"""Fail if any forbidden pattern appears in log output."""
async with capture_logs() as logs:
# Run your application code here
# ...
pass
for pattern in FORBIDDEN_PATTERNS:
matches = re.findall(pattern, logs.text, re.IGNORECASE)
assert not matches, f"Sensitive pattern found: {pattern} -> {matches}"
Production Config Verification
Verify production configuration is correct:
from fapilog import LoggerBuilder
def test_production_preset_enables_redaction():
"""Production preset should have all redactors enabled."""
builder = LoggerBuilder().with_preset("production")
config = builder._config
# Verify redactors are configured
assert "field_mask" in config.get("core", {}).get("redactors", [])
assert "regex_mask" in config.get("core", {}).get("redactors", [])
assert "url_credentials" in config.get("core", {}).get("redactors", [])
Audit Testing
Generate evidence for compliance audits:
from fapilog import LoggerBuilder
def test_gdpr_preset_field_coverage():
"""Document all fields covered by GDPR preset for audit."""
info = LoggerBuilder.get_redaction_preset_info("GDPR_PII")
# Verify required categories are covered
fields = set(info["fields"])
# Contact info
assert "email" in fields
assert "phone" in fields
assert "address" in fields
# Personal identifiers
assert "name" in fields
assert "dob" in fields
# Online identifiers
assert "ip_address" in fields
assert "cookie_id" in fields
# Print for audit documentation
print(f"\nGDPR_PII covers {len(fields)} fields:")
for field in sorted(fields):
print(f" - {field}")
Integration Testing
Test redaction with actual sinks:
import json
import tempfile
from pathlib import Path
import pytest
from fapilog import LoggerBuilder
@pytest.mark.asyncio
async def test_file_sink_receives_redacted_data():
"""Verify redacted data reaches file sink."""
with tempfile.TemporaryDirectory() as tmpdir:
log_file = Path(tmpdir) / "app.log"
logger = await (
LoggerBuilder()
.with_redaction(preset="CREDENTIALS")
.add_file(directory=tmpdir, filename="app.log")
.build_async()
)
await logger.info("Auth event", password="secret123", user="alice")
await logger.shutdown()
# Read and verify file contents
content = log_file.read_text()
log_entry = json.loads(content.strip())
assert log_entry["data"]["password"] == "***"
assert log_entry["data"]["user"] == "alice"
assert "secret123" not in content
Checklist
Before deploying:
Test each preset you use covers expected fields
Test custom fields are redacted
Test patterns catch expected variations
Document known limitations (message strings, arbitrary fields)
Add forbidden pattern tests to CI
Verify production config enables redaction
Generate audit evidence for compliance