# Testing Redaction

How to verify that sensitive data is actually redacted before deploying to production.

## Basic Verification

Use `capture_logs` to test redaction:

```python
import pytest
from fapilog import LoggerBuilder
from fapilog.testing import capture_logs


@pytest.mark.asyncio
async def test_password_is_redacted():
    """Verify password fields are masked in log output."""
    async with capture_logs() as logs:
        logger = await (
            LoggerBuilder()
            .with_redaction(fields=["password"])
            .build_async()
        )
        await logger.info("Login attempt", username="alice", password="hunter2")

    # Password value should not appear
    assert "hunter2" not in logs.text
    # Mask should appear instead
    assert "***" in logs.text
    # Non-sensitive data preserved
    assert "alice" in logs.text
```

## Testing Preset Coverage

Verify compliance presets cover expected fields:

```python
@pytest.mark.asyncio
async def test_gdpr_preset_redacts_email():
    """GDPR preset should redact email addresses."""
    async with capture_logs() as logs:
        logger = await (
            LoggerBuilder()
            .with_redaction(preset="GDPR_PII")
            .build_async()
        )
        await logger.info("User signup", email="test@example.com", name="John Doe")

    assert "test@example.com" not in logs.text
    assert "John Doe" not in logs.text  # name is also PII


@pytest.mark.asyncio
async def test_hipaa_preset_redacts_mrn():
    """HIPAA preset should redact medical record numbers."""
    async with capture_logs() as logs:
        logger = await (
            LoggerBuilder()
            .with_redaction(preset="HIPAA_PHI")
            .build_async()
        )
        await logger.info("Patient visit", mrn="MRN-12345", ssn="123-45-6789")

    assert "MRN-12345" not in logs.text
    assert "123-45-6789" not in logs.text
```

## Testing Pattern Matching

Verify regex patterns catch variations:

```python
@pytest.mark.asyncio
async def test_pattern_catches_variations():
    """Pattern should catch password variations."""
    async with capture_logs() as logs:
        logger = await (
            LoggerBuilder()
            .with_redaction(patterns=[r"(?i).*password.*"])
            .build_async()
        )
        await logger.info(
            "Auth data",
            user_password="secret1",
            password_hash="abc123",
            old_passwd="secret2",
        )

    assert "secret1" not in logs.text
    assert "abc123" not in logs.text
    # Note: "passwd" doesn't match "password" pattern
    # Add separate pattern if needed
```

## Testing URL Credential Stripping

```python
@pytest.mark.asyncio
async def test_url_credentials_stripped():
    """URL credentials should be stripped by default."""
    async with capture_logs() as logs:
        logger = await LoggerBuilder().build_async()
        await logger.info(
            "Database connection",
            url="postgres://admin:supersecret@db.example.com/app",
        )

    # Credentials stripped
    assert "supersecret" not in logs.text
    assert "admin:" not in logs.text
    # Host preserved
    assert "db.example.com" in logs.text
```

## Testing Limitations

Document expected behavior for unsupported scenarios:

```python
@pytest.mark.asyncio
async def test_message_string_not_redacted():
    """PII in message string is NOT redacted - this is expected."""
    async with capture_logs() as logs:
        logger = await (
            LoggerBuilder()
            .with_redaction(preset="GDPR_PII")
            .build_async()
        )
        # WRONG way to log PII
        await logger.info("User email: test@example.com")

    # PII IS exposed - this test documents the limitation
    assert "test@example.com" in logs.text


@pytest.mark.asyncio
async def test_arbitrary_field_name_not_redacted():
    """Arbitrary field names are NOT redacted unless configured."""
    async with capture_logs() as logs:
        logger = await (
            LoggerBuilder()
            .with_redaction(preset="GDPR_PII")
            .build_async()
        )
        # Field name "customer_contact" not in GDPR preset
        await logger.info("Ticket", customer_contact="test@example.com")

    # NOT redacted - field name doesn't match
    assert "test@example.com" in logs.text
```

## CI/CD Verification

### Forbidden Patterns Test

Fail CI if sensitive patterns appear in logs:

```python
import re

FORBIDDEN_PATTERNS = [
    r"\b[A-Za-z0-9]{32,}\b",      # Long tokens
    r"\b\d{3}-\d{2}-\d{4}\b",     # SSN format
    r"password\s*[:=]\s*\S+",     # password=value
    r"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+",  # Email addresses
]


@pytest.mark.asyncio
async def test_no_sensitive_patterns_in_logs():
    """Fail if any forbidden pattern appears in log output."""
    async with capture_logs() as logs:
        # Run your application code here
        # ...
        pass

    for pattern in FORBIDDEN_PATTERNS:
        matches = re.findall(pattern, logs.text, re.IGNORECASE)
        assert not matches, f"Sensitive pattern found: {pattern} -> {matches}"
```

### Production Config Verification

Verify production configuration is correct:

```python
from fapilog import LoggerBuilder


def test_production_preset_enables_redaction():
    """Production preset should have all redactors enabled."""
    builder = LoggerBuilder().with_preset("production")
    config = builder._config

    # Verify redactors are configured
    assert "field_mask" in config.get("core", {}).get("redactors", [])
    assert "regex_mask" in config.get("core", {}).get("redactors", [])
    assert "url_credentials" in config.get("core", {}).get("redactors", [])
```

## Audit Testing

Generate evidence for compliance audits:

```python
from fapilog import LoggerBuilder


def test_gdpr_preset_field_coverage():
    """Document all fields covered by GDPR preset for audit."""
    info = LoggerBuilder.get_redaction_preset_info("GDPR_PII")

    # Verify required categories are covered
    fields = set(info["fields"])

    # Contact info
    assert "email" in fields
    assert "phone" in fields
    assert "address" in fields

    # Personal identifiers
    assert "name" in fields
    assert "dob" in fields

    # Online identifiers
    assert "ip_address" in fields
    assert "cookie_id" in fields

    # Print for audit documentation
    print(f"\nGDPR_PII covers {len(fields)} fields:")
    for field in sorted(fields):
        print(f"  - {field}")
```

## Integration Testing

Test redaction with actual sinks:

```python
import json
import tempfile
from pathlib import Path

import pytest
from fapilog import LoggerBuilder


@pytest.mark.asyncio
async def test_file_sink_receives_redacted_data():
    """Verify redacted data reaches file sink."""
    with tempfile.TemporaryDirectory() as tmpdir:
        log_file = Path(tmpdir) / "app.log"

        logger = await (
            LoggerBuilder()
            .with_redaction(preset="CREDENTIALS")
            .add_file(directory=tmpdir, filename="app.log")
            .build_async()
        )

        await logger.info("Auth event", password="secret123", user="alice")
        await logger.shutdown()

        # Read and verify file contents
        content = log_file.read_text()
        log_entry = json.loads(content.strip())

        assert log_entry["data"]["password"] == "***"
        assert log_entry["data"]["user"] == "alice"
        assert "secret123" not in content
```

## Checklist

Before deploying:

- [ ] Test each preset you use covers expected fields
- [ ] Test custom fields are redacted
- [ ] Test patterns catch expected variations
- [ ] Document known limitations (message strings, arbitrary fields)
- [ ] Add forbidden pattern tests to CI
- [ ] Verify production config enables redaction
- [ ] Generate audit evidence for compliance

## Related

- [Presets Reference](presets.md) - Complete field lists
- [Behavior](behavior.md) - What gets redacted and when
- [Compliance Cookbook](../cookbook/compliance-redaction.md) - What works and what doesn't