Skip to content

Commit 3d79fef

Browse files
committed
feat: add .gitignore pattern support for repository analysis
- Implement hybrid .gitignore processing using git check-ignore with pathspec fallback - Add --respect-gitignore CLI option to both config and generate commands - Update configuration models to store gitignore preference persistently - Enhance RepoAnalyzer with gitignore pattern matching and priority logic - Add comprehensive test suite for gitignore verification including negation patterns - Update documentation with detailed pattern behavior and processing logic - Add pathspec dependency for robust gitignore pattern matching The feature respects .gitignore patterns during file analysis while maintaining proper priority: 1. Git ignore patterns are checked first 2. User CLI exclude patterns override git tracking 3. Default ignore patterns are applied last 4. Include patterns filter the remaining files
1 parent 87b96f5 commit 3d79fef

File tree

12 files changed

+289
-38
lines changed

12 files changed

+289
-38
lines changed

README.md

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ codewiki config set \
108108
# Configure max token settings
109109
codewiki config set --max-tokens 32768 --max-token-per-module 36369 --max-token-per-leaf-module 16000
110110

111-
# Configure max depth for hierarchical decomposition
112-
codewiki config set --max-depth 3
111+
# Configure max depth for hierarchical decomposition and .gitignore support
112+
codewiki config set --max-depth 3 --respect-gitignore
113113

114114
# Show current configuration
115115
codewiki config show
@@ -137,16 +137,16 @@ codewiki generate --github-pages
137137
codewiki generate --verbose
138138

139139
# Full-featured generation
140-
codewiki generate --create-branch --github-pages --verbose
140+
codewiki generate --create-branch --github-pages --verbose --respect-gitignore
141141
```
142142

143143
### Customization Options
144144

145145
CodeWiki supports customization for language-specific projects and documentation styles:
146146

147147
```bash
148-
# C# project: only analyze .cs files, exclude test directories
149-
codewiki generate --include "*.cs" --exclude "Tests,Specs,*.test.cs"
148+
# C# project: only analyze .cs files, exclude test directories, respect .gitignore
149+
codewiki generate --include "*.cs" --exclude "Tests,Specs,*.test.cs" --respect-gitignore
150150

151151
# Focus on specific modules with architecture-style docs
152152
codewiki generate --focus "src/core,src/api" --doc-type architecture
@@ -157,7 +157,7 @@ codewiki generate --instructions "Focus on public APIs and include usage example
157157

158158
#### Pattern Behavior (Important!)
159159

160-
- **`--include`**: When specified, **ONLY** these patterns are used (replaces defaults completely)
160+
- **`--include`**: When specified, **ONLY** these patterns are included from the remaining files (applied after exclusion)
161161
- Example: `--include "*.cs"` will analyze ONLY `.cs` files
162162
- If omitted, all supported file types are analyzed
163163
- Supports glob patterns: `*.py`, `src/**/*.ts`, `*.{js,jsx}`
@@ -170,6 +170,14 @@ codewiki generate --instructions "Focus on public APIs and include usage example
170170
- Glob patterns: `*.test.js`, `*_test.py`, `*.min.*`
171171
- Directory patterns: `build/`, `dist/`, `coverage/`
172172

173+
- **`--respect-gitignore`**: Respect `.gitignore` patterns
174+
- **Hybrid**: Uses `git check-ignore` for full recursive accuracy, falls back to pathspec if git unavailable
175+
- **Processing Logic**:
176+
1. **Git Check**: If matched by `.gitignore`**Excluded**
177+
2. **User Exclude**: If matched by CLI `--exclude`**Excluded** (Overrides Git tracking)
178+
3. **Defaults**: If no match above → Check default ignore patterns
179+
4. **Inclusion**: Final check against `--include` patterns (if specified)
180+
173181
#### Setting Persistent Defaults
174182

175183
Save your preferred settings as defaults:
@@ -202,6 +210,7 @@ codewiki config agent --clear
202210
| `--doc-type` | Documentation style | Standalone option | `api`, `architecture`, `user-guide`, `developer` |
203211
| `--instructions` | Custom agent instructions | Standalone option | Free-form text |
204212

213+
205214
### Token Settings
206215

207216
CodeWiki allows you to configure maximum token limits for LLM calls. This is useful for:

codewiki/cli/adapters/doc_generator.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ def generate(self) -> DocumentationJob:
141141
max_token_per_module=self.config.get('max_token_per_module', 36369),
142142
max_token_per_leaf_module=self.config.get('max_token_per_leaf_module', 16000),
143143
max_depth=self.config.get('max_depth', 2),
144+
respect_gitignore=self.config.get('respect_gitignore', False),
144145
agent_instructions=self.config.get('agent_instructions')
145146
)
146147

codewiki/cli/commands/config.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,12 @@ def config_group():
8383
type=int,
8484
help="Maximum depth for hierarchical decomposition (default: 2)"
8585
)
86+
@click.option(
87+
'--respect-gitignore',
88+
is_flag=True,
89+
default=None,
90+
help='Respect .gitignore patterns during analysis'
91+
)
8692
def config_set(
8793
api_key: Optional[str],
8894
base_url: Optional[str],
@@ -92,7 +98,8 @@ def config_set(
9298
max_tokens: Optional[int],
9399
max_token_per_module: Optional[int],
94100
max_token_per_leaf_module: Optional[int],
95-
max_depth: Optional[int]
101+
max_depth: Optional[int],
102+
respect_gitignore: Optional[bool]
96103
):
97104
"""
98105
Set configuration values for CodeWiki.
@@ -127,7 +134,7 @@ def config_set(
127134
"""
128135
try:
129136
# Check if at least one option is provided
130-
if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth]):
137+
if not any([api_key, base_url, main_model, cluster_model, fallback_model, max_tokens, max_token_per_module, max_token_per_leaf_module, max_depth, respect_gitignore is not None]):
131138
click.echo("No options provided. Use --help for usage information.")
132139
sys.exit(EXIT_CONFIG_ERROR)
133140

@@ -169,6 +176,9 @@ def config_set(
169176
raise ConfigurationError("max_depth must be a positive integer")
170177
validated_data['max_depth'] = max_depth
171178

179+
if respect_gitignore is not None:
180+
validated_data['respect_gitignore'] = respect_gitignore
181+
172182
# Create config manager and save
173183
manager = ConfigManager()
174184
manager.load() # Load existing config if present
@@ -182,7 +192,8 @@ def config_set(
182192
max_tokens=validated_data.get('max_tokens'),
183193
max_token_per_module=validated_data.get('max_token_per_module'),
184194
max_token_per_leaf_module=validated_data.get('max_token_per_leaf_module'),
185-
max_depth=validated_data.get('max_depth')
195+
max_depth=validated_data.get('max_depth'),
196+
respect_gitignore=validated_data.get('respect_gitignore')
186197
)
187198

188199
# Display success messages
@@ -231,6 +242,9 @@ def config_set(
231242
if max_depth:
232243
click.secho(f"✓ Max depth: {max_depth}", fg="green")
233244

245+
if respect_gitignore is not None:
246+
click.secho(f"✓ Respect gitignore: {respect_gitignore}", fg="green")
247+
234248
click.echo("\n" + click.style("Configuration updated successfully.", fg="green", bold=True))
235249

236250
except ConfigurationError as e:
@@ -291,6 +305,7 @@ def config_show(output_json: bool):
291305
"max_token_per_module": config.max_token_per_module if config else 36369,
292306
"max_token_per_leaf_module": config.max_token_per_leaf_module if config else 16000,
293307
"max_depth": config.max_depth if config else 2,
308+
"respect_gitignore": config.respect_gitignore if config else False,
294309
"agent_instructions": config.agent_instructions.to_dict() if config and config.agent_instructions else {},
295310
"config_file": str(manager.config_file_path)
296311
}
@@ -335,7 +350,7 @@ def config_show(output_json: bool):
335350
click.secho("Decomposition Settings", fg="cyan", bold=True)
336351
if config:
337352
click.echo(f" Max Depth: {config.max_depth}")
338-
353+
click.echo(f" Respect Gitignore: {config.respect_gitignore}")
339354
click.echo()
340355
click.secho("Agent Instructions", fg="cyan", bold=True)
341356
if config and config.agent_instructions and not config.agent_instructions.is_empty():

codewiki/cli/commands/generate.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ def parse_patterns(patterns_str: str) -> List[str]:
126126
default=None,
127127
help="Maximum depth for hierarchical decomposition (overrides config)",
128128
)
129+
@click.option(
130+
'--respect-gitignore',
131+
is_flag=True,
132+
default=None,
133+
help='Respect .gitignore patterns during analysis'
134+
)
129135
@click.pass_context
130136
def generate_command(
131137
ctx,
@@ -142,7 +148,8 @@ def generate_command(
142148
max_tokens: Optional[int],
143149
max_token_per_module: Optional[int],
144150
max_token_per_leaf_module: Optional[int],
145-
max_depth: Optional[int]
151+
max_depth: Optional[int],
152+
respect_gitignore: Optional[bool]
146153
):
147154
"""
148155
Generate comprehensive documentation for a code repository.
@@ -290,7 +297,8 @@ def generate_command(
290297
create_branch=create_branch,
291298
github_pages=github_pages,
292299
no_cache=no_cache,
293-
custom_output=output if output != "docs" else None
300+
custom_output=output if output != "docs" else None,
301+
respect_gitignore=respect_gitignore if respect_gitignore is not None else config.respect_gitignore
294302
)
295303

296304
# Create runtime agent instructions from CLI options
@@ -322,10 +330,12 @@ def generate_command(
322330
effective_max_token_per_module = max_token_per_module if max_token_per_module is not None else config.max_token_per_module
323331
effective_max_token_per_leaf = max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module
324332
effective_max_depth = max_depth if max_depth is not None else config.max_depth
333+
effective_respect_gitignore = respect_gitignore if respect_gitignore is not None else config.respect_gitignore
325334
logger.debug(f"Max tokens: {effective_max_tokens}")
326335
logger.debug(f"Max token/module: {effective_max_token_per_module}")
327336
logger.debug(f"Max token/leaf module: {effective_max_token_per_leaf}")
328337
logger.debug(f"Max depth: {effective_max_depth}")
338+
logger.debug(f"Respect gitignore: {effective_respect_gitignore}")
329339

330340
# Get agent instructions (merge runtime with persistent)
331341
agent_instructions_dict = None
@@ -359,6 +369,8 @@ def generate_command(
359369
'max_token_per_leaf_module': max_token_per_leaf_module if max_token_per_leaf_module is not None else config.max_token_per_leaf_module,
360370
# Max depth setting (runtime override takes precedence)
361371
'max_depth': max_depth if max_depth is not None else config.max_depth,
372+
# Gitignore setting (runtime override takes precedence)
373+
'respect_gitignore': respect_gitignore if respect_gitignore is not None else config.respect_gitignore,
362374
},
363375
verbose=verbose,
364376
generate_html=github_pages

codewiki/cli/config_manager.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ def save(
9292
max_tokens: Optional[int] = None,
9393
max_token_per_module: Optional[int] = None,
9494
max_token_per_leaf_module: Optional[int] = None,
95-
max_depth: Optional[int] = None
95+
max_depth: Optional[int] = None,
96+
respect_gitignore: Optional[bool] = None,
9697
):
9798
"""
9899
Save configuration to file and keyring.
@@ -108,6 +109,7 @@ def save(
108109
max_token_per_module: Maximum tokens per module for clustering
109110
max_token_per_leaf_module: Maximum tokens per leaf module
110111
max_depth: Maximum depth for hierarchical decomposition
112+
respect_gitignore: Respect .gitignore patterns during analysis
111113
"""
112114
# Ensure config directory exists
113115
try:
@@ -149,7 +151,9 @@ def save(
149151
self._config.max_token_per_leaf_module = max_token_per_leaf_module
150152
if max_depth is not None:
151153
self._config.max_depth = max_depth
152-
154+
if respect_gitignore is not None:
155+
self._config.respect_gitignore = respect_gitignore
156+
153157
# Validate configuration (only if base fields are set)
154158
if self._config.base_url and self._config.main_model and self._config.cluster_model:
155159
self._config.validate()

codewiki/cli/models/config.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ class Configuration:
118118
max_token_per_leaf_module: Maximum tokens per leaf module (default: 16000)
119119
max_depth: Maximum depth for hierarchical decomposition (default: 2)
120120
agent_instructions: Custom agent instructions for documentation generation
121+
respect_gitignore: Respect .gitignore patterns during analysis
121122
"""
122123
base_url: str
123124
main_model: str
@@ -129,6 +130,7 @@ class Configuration:
129130
max_token_per_leaf_module: int = 16000
130131
max_depth: int = 2
131132
agent_instructions: AgentInstructions = field(default_factory=AgentInstructions)
133+
respect_gitignore: bool = False
132134

133135
def validate(self):
134136
"""
@@ -153,6 +155,7 @@ def to_dict(self) -> dict:
153155
'max_token_per_module': self.max_token_per_module,
154156
'max_token_per_leaf_module': self.max_token_per_leaf_module,
155157
'max_depth': self.max_depth,
158+
'respect_gitignore': self.respect_gitignore,
156159
}
157160
if self.agent_instructions and not self.agent_instructions.is_empty():
158161
result['agent_instructions'] = self.agent_instructions.to_dict()
@@ -184,6 +187,7 @@ def from_dict(cls, data: dict) -> 'Configuration':
184187
max_token_per_leaf_module=data.get('max_token_per_leaf_module', 16000),
185188
max_depth=data.get('max_depth', 2),
186189
agent_instructions=agent_instructions,
190+
respect_gitignore=data.get('respect_gitignore', False),
187191
)
188192

189193
def is_complete(self) -> bool:
@@ -237,6 +241,7 @@ def to_backend_config(self, repo_path: str, output_dir: str, api_key: str, runti
237241
max_token_per_module=self.max_token_per_module,
238242
max_token_per_leaf_module=self.max_token_per_leaf_module,
239243
max_depth=self.max_depth,
240-
agent_instructions=final_instructions.to_dict() if final_instructions else None
244+
agent_instructions=final_instructions.to_dict() if final_instructions else None,
245+
respect_gitignore=self.respect_gitignore,
241246
)
242247

codewiki/cli/models/job.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class GenerationOptions:
2525
github_pages: bool = False
2626
no_cache: bool = False
2727
custom_output: Optional[str] = None
28+
respect_gitignore: Optional[bool] = None
2829

2930

3031
@dataclass

0 commit comments

Comments
 (0)