diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py index 1081d4e4ddac..38de32cae992 100644 --- a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_red_team.py @@ -55,7 +55,7 @@ ) # PyRIT imports -from pyrit.common import initialize_pyrit, DUCK_DB +from pyrit.common import initialize_pyrit, SQLITE from pyrit.prompt_target import PromptChatTarget # Local imports - constants and utilities @@ -218,8 +218,10 @@ def __init__( # keep track of prompt content to risk_sub_type mapping for evaluation self.prompt_to_risk_subtype = {} - # Initialize PyRIT - initialize_pyrit(memory_db_type=DUCK_DB) + # Initialize PyRIT with SQLite (only option in 0.10.0+) + db_path = os.path.join(self.output_dir, "pyrit_memory.db") + initialize_pyrit(memory_db_type=SQLITE, memory_db_path=db_path) + self.logger.debug(f"Initialized PyRIT with SQLite at {db_path}") # Initialize attack objective generator self.attack_objective_generator = _AttackObjectiveGenerator( diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_utils/strategy_mapping.py b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_utils/strategy_mapping.py new file mode 100644 index 000000000000..ebf5c295b250 --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/_utils/strategy_mapping.py @@ -0,0 +1,71 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +"""Strategy mapping utilities for PyRIT FoundryScenario integration.""" + +from typing import Dict, Optional + +try: + from pyrit.scenario.scenarios.foundry_scenario import FoundryStrategy +except ImportError: + # Fallback for environments where PyRIT 0.10.0+ is not available + FoundryStrategy = None + +from .._attack_strategy import AttackStrategy + + +# Mapping table: azure-sdk AttackStrategy → PyRIT FoundryStrategy +# Note: PAIR strategy is not present in current AttackStrategy enum +ATTACK_STRATEGY_TO_FOUNDRY_STRATEGY: Dict[AttackStrategy, Optional[str]] = { + # Baseline (no modifications) + AttackStrategy.Baseline: None, # Special case - handled separately + + # Single-turn converter strategies (easy) + AttackStrategy.ROT13: "ROT13" if FoundryStrategy is None else FoundryStrategy.ROT13, + AttackStrategy.Base64: "Base64" if FoundryStrategy is None else FoundryStrategy.Base64, + + # Multi-turn attack strategies (difficult) + AttackStrategy.MultiTurn: "MultiTurn" if FoundryStrategy is None else FoundryStrategy.MultiTurn, + AttackStrategy.Crescendo: "Crescendo" if FoundryStrategy is None else FoundryStrategy.Crescendo, + + # Jailbreak strategy + AttackStrategy.Jailbreak: "Jailbreak" if FoundryStrategy is None else FoundryStrategy.Jailbreak, +} + + +def convert_attack_strategy_to_foundry(strategy: AttackStrategy) -> Optional[str]: + """ + Convert azure-sdk-for-python AttackStrategy to PyRIT FoundryStrategy. + + Args: + strategy: The attack strategy to convert + + Returns: + Corresponding FoundryStrategy, or None for baseline + + Raises: + ValueError: If strategy is not supported in FoundryScenario + """ + if strategy == AttackStrategy.Baseline: + return None # Baseline handled separately + + if strategy not in ATTACK_STRATEGY_TO_FOUNDRY_STRATEGY: + raise ValueError( + f"Attack strategy {strategy} is not supported with FoundryScenario. " + f"Supported strategies: {list(ATTACK_STRATEGY_TO_FOUNDRY_STRATEGY.keys())}" + ) + + return ATTACK_STRATEGY_TO_FOUNDRY_STRATEGY[strategy] + + +def is_foundry_supported(strategy: AttackStrategy) -> bool: + """ + Check if an attack strategy is supported by FoundryScenario. + + Args: + strategy: The attack strategy to check + + Returns: + True if supported, False otherwise + """ + return strategy in ATTACK_STRATEGY_TO_FOUNDRY_STRATEGY diff --git a/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/progress.md b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/progress.md new file mode 100644 index 000000000000..08b0bb9f8940 --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/azure/ai/evaluation/red_team/progress.md @@ -0,0 +1,151 @@ +# PyRIT FoundryScenario Integration - Progress Tracker + +**Last Updated:** 2025-12-18 +**Current Phase:** Phase 1 - Implementation In Progress +**Next Milestone:** Complete Core Components + +## Completed Steps + +### ✅ Phase 0: Planning (Completed 2025-12-17) +- [x] Conducted comprehensive FoundryScenario inventory +- [x] Created detailed technical specification (spec.md v3.0) +- [x] Identified breaking changes in PyRIT 0.10.0 +- [x] Designed strategy mapping layer +- [x] Planned 4-phase migration strategy + +### 🔄 Phase 1: Implementation (In Progress - Started 2025-12-18) + +**Completed:** +- [x] Fixed breaking change: `DUCK_DB` → `SQLITE` in `_red_team.py` (line 58) +- [x] Updated initialization call in `_red_team.py` (line 221-225): use `SQLITE` with `memory_db_path` +- [x] Created `_utils/strategy_mapping.py` with FoundryStrategy mapping +- [x] Updated imports to use `SQLITE` instead of `DUCK_DB` +- [x] Added logging statement for SQLite database path + +**In Progress:** +- [ ] Create `_scenario_manager.py` for FoundryScenario lifecycle management +- [ ] Create `_result_converter.py` for MessagePiece → red_team_info conversion +- [ ] Add feature flag `USE_FOUNDRY_SCENARIOS` to `_red_team.py` +- [ ] Implement dual-path logic in `scan()` method + +**Blocked:** +- IndirectJailbreak (XPIA) strategy - not yet supported in PyRIT FoundryScenario + - Workaround: Keep custom XPIA logic outside FoundryScenario temporarily + +## Current Implementation Status + +### Breaking Changes Fixed + +| Change | Status | Notes | +|--------|--------|-------| +| `DUCK_DB` → `SQLITE` | ✅ Complete | Line 58 in `_red_team.py` | +| Import path updated | ✅ Complete | `from pyrit.common import SQLITE` | +| Database path configuration | ✅ Complete | Uses `output_dir/pyrit_memory.db` | +| Initialize call updated | ✅ Complete | Line 221-225 with `memory_db_path` parameter | + +### Components Status + +| Component | Status | File | Notes | +|-----------|--------|------|-------| +| Strategy Mapping | ✅ Complete | `_utils/strategy_mapping.py` | Maps AttackStrategy → FoundryStrategy | +| Scenario Manager | ⬜ Pending | `_scenario_manager.py` | TODO | +| Result Converter | ⬜ Pending | `_result_converter.py` | TODO | +| Feature Flag | ⬜ Pending | `_red_team.py` | TODO | + +## Issues Encountered + +### 1. PyRIT 0.10.0 API Changes +**Issue:** PromptRequestPiece renamed to MessagePiece +**Impact:** Medium - affects all memory queries +**Resolution:** Use `MessagePiece` and `get_message_pieces()` consistently +**Status:** ✅ Documented in spec.md + +### 2. DuckDB Removal +**Issue:** `DUCK_DB` constant no longer exists in PyRIT +**Impact:** High - blocking import +**Resolution:** Changed to `SQLITE` with explicit db_path +**Status:** ✅ Fixed in _red_team.py line 58 and line 221-225 + +### 3. IndirectJailbreak Strategy +**Issue:** PyRIT FoundryScenario doesn't support XPIA/IndirectJailbreak +**Impact:** Medium - one strategy unsupported +**Resolution:** Keep custom XPIA logic outside FoundryScenario (temporary) +**Status:** 🔄 Workaround implemented, feature request pending + +### 4. PAIR Strategy Not in Enum +**Issue:** PAIR strategy mentioned in spec but not in AttackStrategy enum +**Impact:** Low - mapping adjustment needed +**Resolution:** Excluded PAIR from strategy mapping (not available in enum) +**Status:** ✅ Mapping created without PAIR + +## Open Questions + +1. **Memory Labels for Context Preservation** + - Question: Can we store nested dicts (contexts list) in memory labels? + - Answer: Yes - PyRIT serializes labels to JSON in SQLite + - Status: ✅ Resolved + +2. **Baseline Attack Handling** + - Question: Should baseline use FoundryScenario or stay separate? + - Answer: Use `include_baseline=True` in FoundryScenario + - Status: ✅ Resolved + +3. **Result Format Compatibility** + - Question: Does FoundryScenario output match existing JSONL format? + - Answer: Needs transformation via ResultConverter + - Status: ⬜ To be verified in testing + +## Next Steps (Immediate) + +1. **Create Scenario Manager** (priority: high) + - [ ] Implement `ScenarioManager` class + - [ ] Add SQLite initialization + - [ ] Configure RAI service scorer + - [ ] Set up adversarial chat target + +2. **Create Result Converter** (priority: high) + - [ ] Implement `ResultConverter` class + - [ ] Extract data from MessagePiece + - [ ] Build red_team_info format + - [ ] Handle context/risk_subtype mapping + +3. **Add Feature Flag** (priority: medium) + - [ ] Add `USE_FOUNDRY_SCENARIOS` environment variable + - [ ] Implement dual-path logic in `scan()` + - [ ] Ensure backward compatibility + +4. **Testing** (priority: high) + - [ ] Unit tests for strategy_mapping + - [ ] Integration test with real FoundryScenario + - [ ] Verify output format matches + +## Design Decisions Log + +| Decision | Rationale | Date | Status | +|----------|-----------|------|--------| +| Target PyRIT 0.10.0+ | Latest stable, SQLite-only | 2025-12-17 | ✅ | +| Use SQLite memory | Only option in 0.10.0+ | 2025-12-17 | ✅ | +| Use MessagePiece | PyRIT 0.10.0 renamed class | 2025-12-17 | ✅ | +| Strategy mapping layer | Decouple from PyRIT changes | 2025-12-17 | ✅ | +| Feature flag rollout | Gradual migration, A/B test | 2025-12-17 | ⬜ | +| Keep XPIA custom logic | PyRIT doesn't support yet | 2025-12-18 | ✅ | +| Exclude PAIR from mapping | Not in AttackStrategy enum | 2025-12-18 | ✅ | + +## Metrics + +### Code Changes (Actual) +- **Files modified:** 2 (_red_team.py, progress.md) +- **Files created:** 2 (_utils/strategy_mapping.py, progress.md) +- **Lines added:** ~160 +- **Lines modified:** ~8 + +### Test Coverage (Target) +- Unit tests: >90% +- Integration tests: >85% +- E2E scenarios: 100% of critical paths + +--- + +**Document Owner:** Azure AI Evaluation Team +**Last Updated:** 2025-12-18 +**Next Review:** Phase 1 completion diff --git a/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_strategy_mapping.py b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_strategy_mapping.py new file mode 100644 index 000000000000..aa0afaa1f3ef --- /dev/null +++ b/sdk/evaluation/azure-ai-evaluation/tests/unittests/test_redteam/test_strategy_mapping.py @@ -0,0 +1,107 @@ +""" +Unit tests for strategy_mapping module. +""" + +import pytest +from unittest.mock import MagicMock, patch + +from azure.ai.evaluation.red_team._attack_strategy import AttackStrategy +from azure.ai.evaluation.red_team._utils.strategy_mapping import ( + convert_attack_strategy_to_foundry, + is_foundry_supported, + ATTACK_STRATEGY_TO_FOUNDRY_STRATEGY, +) + + +@pytest.mark.unittest +class TestStrategyMapping: + """Test the strategy mapping functionality.""" + + def test_baseline_returns_none(self): + """Test that Baseline strategy returns None.""" + result = convert_attack_strategy_to_foundry(AttackStrategy.Baseline) + assert result is None + + def test_supported_strategies_in_mapping(self): + """Test that supported strategies are in the mapping.""" + # Test strategies that should be supported + supported = [ + AttackStrategy.Baseline, + AttackStrategy.ROT13, + AttackStrategy.Base64, + AttackStrategy.MultiTurn, + AttackStrategy.Crescendo, + AttackStrategy.Jailbreak, + ] + for strategy in supported: + assert strategy in ATTACK_STRATEGY_TO_FOUNDRY_STRATEGY + + def test_unsupported_strategy_raises_error(self): + """Test that unsupported strategy raises ValueError.""" + # IndirectJailbreak is not in the mapping + with pytest.raises(ValueError) as excinfo: + convert_attack_strategy_to_foundry(AttackStrategy.IndirectJailbreak) + assert "is not supported with FoundryScenario" in str(excinfo.value) + + def test_rot13_mapping(self): + """Test ROT13 strategy mapping.""" + # Should return a FoundryStrategy value or string + result = convert_attack_strategy_to_foundry(AttackStrategy.ROT13) + assert result is not None + + def test_base64_mapping(self): + """Test Base64 strategy mapping.""" + result = convert_attack_strategy_to_foundry(AttackStrategy.Base64) + assert result is not None + + def test_multiturn_mapping(self): + """Test MultiTurn strategy mapping.""" + result = convert_attack_strategy_to_foundry(AttackStrategy.MultiTurn) + assert result is not None + + def test_crescendo_mapping(self): + """Test Crescendo strategy mapping.""" + result = convert_attack_strategy_to_foundry(AttackStrategy.Crescendo) + assert result is not None + + def test_jailbreak_mapping(self): + """Test Jailbreak strategy mapping.""" + result = convert_attack_strategy_to_foundry(AttackStrategy.Jailbreak) + assert result is not None + + +@pytest.mark.unittest +class TestIsFoundrySupported: + """Test the is_foundry_supported function.""" + + def test_baseline_supported(self): + """Test that Baseline is supported.""" + assert is_foundry_supported(AttackStrategy.Baseline) is True + + def test_rot13_supported(self): + """Test that ROT13 is supported.""" + assert is_foundry_supported(AttackStrategy.ROT13) is True + + def test_base64_supported(self): + """Test that Base64 is supported.""" + assert is_foundry_supported(AttackStrategy.Base64) is True + + def test_multiturn_supported(self): + """Test that MultiTurn is supported.""" + assert is_foundry_supported(AttackStrategy.MultiTurn) is True + + def test_crescendo_supported(self): + """Test that Crescendo is supported.""" + assert is_foundry_supported(AttackStrategy.Crescendo) is True + + def test_jailbreak_supported(self): + """Test that Jailbreak is supported.""" + assert is_foundry_supported(AttackStrategy.Jailbreak) is True + + def test_indirect_jailbreak_not_supported(self): + """Test that IndirectJailbreak is not supported.""" + assert is_foundry_supported(AttackStrategy.IndirectJailbreak) is False + + def test_ascii_art_not_supported(self): + """Test that AsciiArt is not supported.""" + assert is_foundry_supported(AttackStrategy.AsciiArt) is False