From b8439e352ed7fd2af8c30f9ef2aab42320b661a6 Mon Sep 17 00:00:00 2001
From: clark874 <86122040+clark874@users.noreply.github.com>
Date: Wed, 1 Jan 2025 20:00:06 +0800
Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E6=99=BA?=
 =?UTF-8?q?=E8=83=BD=E4=BB=A3=E7=A0=81=E4=BB=93=E5=BA=93=E5=88=86=E6=9E=90?=
 =?UTF-8?q?=E5=B7=A5=E5=85=B7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. 添加配置文件管理系统
2. 添加智能分析工具
3. 优化输出路径管理
---
 config.yaml       |  45 +++++++++
 config_manager.py | 142 ++++++++++++++++++++++++++
 smart_analysis.py | 248 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 435 insertions(+)
 create mode 100644 config.yaml
 create mode 100644 config_manager.py
 create mode 100644 smart_analysis.py

diff --git a/config.yaml b/config.yaml
new file mode 100644
index 00000000..ff70283f
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,45 @@
+# 输入配置
+input:
+  supported_sources:
+    - local
+    - github
+    - gitlab
+  default_source: local
+
+# 路径配置
+paths:
+  input:
+    base_dir: .  # 默认使用当前目录
+    github: ""   # 默认GitHub仓库
+    gitlab: ""   # 默认GitLab仓库
+  output:
+    base_dir: output  # 输出基础目录
+    reports: reports  # 报告目录
+    trees: trees     # 目录树文件
+    temp: temp       # 临时文件
+
+# 树形结构配置
+tree:
+  max_depth: 4  # 目录树最大深度
+
+# 文件配置
+file:
+  max_size: 10485760  # 10MB
+  encoding: utf-8     # 文件编码
+
+# 输出配置
+output:
+  formats:
+    - md
+    - json
+    - txt
+  default_format: md
+  files:
+    md: analysis_report.md
+    json: analysis_report.json
+    txt: analysis_report.txt
+    tree: directory_tree.txt
+
+# 内容配置
+content:
+  preview_length: 1000  # 内容预览长度（字符数）
diff --git a/config_manager.py b/config_manager.py
new file mode 100644
index 00000000..b563ce39
--- /dev/null
+++ b/config_manager.py
@@ -0,0 +1,142 @@
+import os
+import yaml
+from typing import Dict, List, Optional
+
+class ConfigManager:
+    """配置管理器：负责加载和管理配置"""
+    
+    def __init__(self, config_file: str = "config.yaml"):
+        self.config_file = config_file
+        self.config = self._load_config()
+        self._init_directories()
+    
+    def _load_config(self) -> Dict:
+        """加载配置文件"""
+        try:
+            with open(self.config_file, 'r', encoding='utf-8') as f:
+                return yaml.safe_load(f)
+        except Exception as e:
+            print(f"加载配置文件失败: {str(e)}")
+            return {}
+    
+    def _init_directories(self):
+        """初始化所有必要的目录"""
+        # 创建输出基础目录
+        os.makedirs(self.output_base_dir, exist_ok=True)
+        # 创建报告目录
+        os.makedirs(self.reports_dir, exist_ok=True)
+        # 创建树文件目录
+        os.makedirs(self.trees_dir, exist_ok=True)
+        # 创建临时文件目录
+        os.makedirs(self.temp_dir, exist_ok=True)
+    
+    @property
+    def tree_max_depth(self) -> int:
+        """获取目录树最大深度"""
+        return self.config.get('tree', {}).get('max_depth', 4)
+    
+    @property
+    def max_file_size(self) -> int:
+        """获取最大文件大小"""
+        return self.config.get('file', {}).get('max_size', 10 * 1024 * 1024)
+    
+    @property
+    def file_encoding(self) -> str:
+        """获取文件编码"""
+        return self.config.get('file', {}).get('encoding', 'utf-8')
+    
+    # 输入路径相关
+    @property
+    def input_base_dir(self) -> str:
+        """获取输入基础目录"""
+        return self.config.get('paths', {}).get('input', {}).get('base_dir', os.getcwd())
+    
+    @property
+    def github_repo(self) -> str:
+        """获取默认GitHub仓库地址"""
+        return self.config.get('paths', {}).get('input', {}).get('github', '')
+    
+    @property
+    def gitlab_repo(self) -> str:
+        """获取默认GitLab仓库地址"""
+        return self.config.get('paths', {}).get('input', {}).get('gitlab', '')
+    
+    # 输出路径相关
+    @property
+    def output_base_dir(self) -> str:
+        """获取输出基础目录"""
+        return self.config.get('paths', {}).get('output', {}).get('base_dir', 'output')
+    
+    @property
+    def reports_dir(self) -> str:
+        """获取报告目录"""
+        reports = self.config.get('paths', {}).get('output', {}).get('reports', 'reports')
+        return os.path.join(self.output_base_dir, reports)
+    
+    @property
+    def trees_dir(self) -> str:
+        """获取树文件目录"""
+        trees = self.config.get('paths', {}).get('output', {}).get('trees', 'trees')
+        return os.path.join(self.output_base_dir, trees)
+    
+    @property
+    def temp_dir(self) -> str:
+        """获取临时文件目录"""
+        temp = self.config.get('paths', {}).get('output', {}).get('temp', 'temp')
+        return os.path.join(self.output_base_dir, temp)
+    
+    @property
+    def supported_formats(self) -> List[str]:
+        """获取支持的输出格式"""
+        return self.config.get('output', {}).get('formats', ['md'])
+    
+    @property
+    def default_format(self) -> str:
+        """获取默认输出格式"""
+        return self.config.get('output', {}).get('default_format', 'md')
+    
+    def get_output_file(self, format_type: str) -> str:
+        """获取指定格式的输出文件名"""
+        files = self.config.get('output', {}).get('files', {})
+        return files.get(format_type, f'analysis_result.{format_type}')
+    
+    def get_output_path(self, filename: str, output_type: str = 'reports') -> str:
+        """
+        获取输出文件的完整路径
+        :param filename: 文件名
+        :param output_type: 输出类型（reports/trees/temp）
+        :return: 完整路径
+        """
+        if output_type == 'reports':
+            base_dir = self.reports_dir
+        elif output_type == 'trees':
+            base_dir = self.trees_dir
+        elif output_type == 'temp':
+            base_dir = self.temp_dir
+        else:
+            base_dir = self.output_base_dir
+            
+        return os.path.join(base_dir, filename)
+    
+    @property
+    def content_preview_length(self) -> int:
+        """获取内容预览长度"""
+        return self.config.get('content', {}).get('preview_length', 1000)
+    
+    @property
+    def supported_sources(self) -> List[str]:
+        """获取支持的输入源类型"""
+        return self.config.get('input', {}).get('supported_sources', ['local'])
+    
+    @property
+    def default_source(self) -> str:
+        """获取默认输入源类型"""
+        return self.config.get('input', {}).get('default_source', 'local')
+    
+    def validate_format(self, format_type: str) -> bool:
+        """验证输出格式是否支持"""
+        return format_type in self.supported_formats
+    
+    def validate_source(self, source_type: str) -> bool:
+        """验证输入源类型是否支持"""
+        return source_type in self.supported_sources
diff --git a/smart_analysis.py b/smart_analysis.py
new file mode 100644
index 00000000..9683a9fb
--- /dev/null
+++ b/smart_analysis.py
@@ -0,0 +1,248 @@
+import os
+import subprocess
+import json
+import argparse
+from typing import List, Tuple, Dict
+from gitingest.ingest import ingest
+from config_manager import ConfigManager
+
+# 加载配置
+config = ConfigManager()
+
+def generate_tree(directory: str, max_depth: int = None) -> str:
+    """
+    生成目录树结构
+    :param directory: 要分析的目录路径
+    :param max_depth: 树的最大深度
+    :return: 目录树的字符串表示
+    """
+    if max_depth is None:
+        max_depth = config.tree_max_depth
+        
+    try:
+        result = subprocess.run(
+            ['tree', '-L', str(max_depth)],
+            cwd=directory,
+            capture_output=True,
+            text=True
+        )
+        return result.stdout
+    except Exception as e:
+        return f"生成目录树时出错: {str(e)}"
+
+def analyze_tree_and_suggest_patterns(tree_output: str) -> Tuple[List[str], List[str]]:
+    """
+    分析目录树并建议包含和排除模式
+    :param tree_output: 目录树字符串
+    :return: (包含模式列表, 排除模式列表)
+    """
+    # 针对markdown文档的包含模式
+    include_patterns = [
+        # 核心源代码
+        "**/*.py",           # Python源代码
+        
+        # 文档和配置
+        "README.md",         # 主要文档
+        "CHANGELOG.md",      # 变更日志
+        "LICENSE",           # 许可证
+        "requirements.txt",  # Python依赖
+        "pyproject.toml",    # Python项目配置
+        "setup.py",         # 安装配置
+        "setup.cfg",        # 安装配置
+        "MANIFEST.in",      # 打包配置
+        
+        # 核心文档（选择性包含）
+        "docs/**/*.md",     # 文档目录下的markdown文件
+    ]
+    
+    # 排除模式 - 更细致的控制
+    exclude_patterns = [
+        # 二进制和生成文件
+        "**/*.pyc",          # Python编译文件
+        "**/__pycache__/**", # Python缓存
+        "**/*.so",           # 编译的扩展模块
+        "**/*.pyd",          # Windows下的Python扩展模块
+        "**/*.dll",          # Windows动态链接库
+        "**/*.dylib",        # Mac动态链接库
+        "**/*.egg",          # Python打包文件
+        "**/*.whl",          # Python wheel包
+        "**/*.exe",          # 可执行文件
+        
+        # 媒体文件
+        "**/*.png",          # PNG图片
+        "**/*.jpg",          # JPG图片
+        "**/*.jpeg",         # JPEG图片
+        "**/*.gif",          # GIF图片
+        "**/*.ico",          # 图标文件
+        "**/*.svg",          # SVG图片
+        "**/*.mp4",          # 视频文件
+        "**/*.mov",          # 视频文件
+        "**/*.avi",          # 视频文件
+        "**/*.mp3",          # 音频文件
+        "**/*.wav",          # 音频文件
+        
+        # 开发工具和临时文件
+        "**/.git/**",        # Git目录
+        "**/.idea/**",       # PyCharm配置
+        "**/.vscode/**",     # VSCode配置
+        "**/.env",           # 环境变量
+        "**/.env.*",         # 环境变量文件
+        "**/node_modules/**", # Node.js模块
+        "**/venv/**",        # Python虚拟环境
+        "**/env/**",         # Python虚拟环境
+        "**/build/**",       # 构建目录
+        "**/dist/**",        # 分发目录
+        "**/.pytest_cache/**", # Pytest缓存
+        "**/.coverage",      # 测试覆盖率文件
+        "**/htmlcov/**",     # 测试覆盖率报告
+        
+        # 编译和打包相关
+        "**/*.min.js",       # 压缩的JS文件
+        "**/*.min.css",      # 压缩的CSS文件
+        "**/*.map",          # Source map文件
+        "**/webpack.stats.json", # Webpack统计文件
+        
+        # UI构建文件
+        "**/ui/**/*.js",     # UI JavaScript文件
+        "**/ui/**/*.css",    # UI样式文件
+        "**/ui/build/**",    # UI构建输出
+        "**/ui/dist/**",     # UI分发文件
+        
+        # 测试文件（可选，取决于是否需要包含测试文档）
+        "**/tests/**",       # 测试目录
+        "**/test_*.py",      # 测试文件
+        "**/*_test.py",      # 测试文件
+        
+        # Jupyter notebooks（可选）
+        "**/*.ipynb",        # Jupyter笔记本
+        "**/.ipynb_checkpoints/**", # Jupyter检查点
+    ]
+    
+    return include_patterns, exclude_patterns
+
+def smart_ingest(directory: str, max_file_size: int = None, output_format: str = None) -> Dict:
+    """
+    智能分析目录并生成报告
+    :param directory: 要分析的目录路径
+    :param max_file_size: 最大文件大小限制
+    :param output_format: 输出格式（md/json/txt）
+    :return: 分析报告字典
+    """
+    if max_file_size is None:
+        max_file_size = config.max_file_size
+    if output_format is None:
+        output_format = config.default_format
+        
+    if not config.validate_format(output_format):
+        raise ValueError(f"不支持的输出格式: {output_format}")
+    
+    # 步骤1：生成目录树
+    print("步骤1: 生成目录树...")
+    tree_output = generate_tree(directory)
+    print(tree_output)
+    
+    # 保存目录树
+    tree_file = config.get_output_path(config.get_output_file('tree'), 'trees')
+    with open(tree_file, 'w', encoding=config.file_encoding) as f:
+        f.write(tree_output)
+    
+    # 步骤2：分析树结构并建议过滤模式
+    print("\n步骤2: 分析目录结构并生成建议...")
+    include_patterns, exclude_patterns = analyze_tree_and_suggest_patterns(tree_output)
+    
+    print("建议的包含模式:")
+    for pattern in include_patterns:
+        print(f"  - {pattern}")
+    
+    print("\n建议的排除模式:")
+    for pattern in exclude_patterns:
+        print(f"  - {pattern}")
+    
+    # 步骤3：执行ingest
+    print("\n步骤3: 执行文件分析...")
+    try:
+        summary, tree, content = ingest(
+            source=directory,
+            max_file_size=max_file_size,
+            include_patterns=include_patterns,
+            exclude_patterns=exclude_patterns,
+            output=config.get_output_path(config.get_output_file(output_format), 'reports')
+        )
+        
+        # 返回完整报告
+        report = {
+            "directory_tree": tree_output,
+            "suggested_patterns": {
+                "include": include_patterns,
+                "exclude": exclude_patterns
+            },
+            "analysis_result": {
+                "summary": summary,
+                "tree": tree,
+                "content": content[:config.content_preview_length] + "..." 
+                          if len(content) > config.content_preview_length else content
+            }
+        }
+        
+        # 保存JSON报告
+        json_file = config.get_output_path(config.get_output_file('json'), 'reports')
+        with open(json_file, "w", encoding=config.file_encoding) as f:
+            json.dump(report, f, ensure_ascii=False, indent=2)
+            
+        return report
+        
+    except Exception as e:
+        return {
+            "error": f"分析过程中出错: {str(e)}",
+            "directory_tree": tree_output,
+            "suggested_patterns": {
+                "include": include_patterns,
+                "exclude": exclude_patterns
+            }
+        }
+
+if __name__ == "__main__":
+    # 配置命令行参数
+    parser = argparse.ArgumentParser(description="智能代码仓库分析工具")
+    parser.add_argument("--source", "-s", type=str, 
+                      default=config.input_base_dir,
+                      help="要分析的源目录路径")
+    parser.add_argument("--source-type", "-t", type=str,
+                      choices=config.supported_sources,
+                      default=config.default_source,
+                      help="输入源类型")
+    parser.add_argument("--max-depth", "-d", type=int, 
+                      default=config.tree_max_depth,
+                      help="目录树最大深度")
+    parser.add_argument("--max-size", "-m", type=int, 
+                      default=config.max_file_size,
+                      help="最大文件大小(bytes)")
+    parser.add_argument("--output-dir", "-o", type=str, 
+                      default=config.output_base_dir,
+                      help="输出基础目录")
+    parser.add_argument("--format", "-f", type=str,
+                      choices=config.supported_formats,
+                      default=config.default_format,
+                      help="输出格式")
+    
+    args = parser.parse_args()
+    
+    print(f"开始分析目录: {args.source}")
+    print(f"配置信息:")
+    print(f"- 输入源类型: {args.source_type}")
+    print(f"- 目录树深度: {args.max_depth}")
+    print(f"- 最大文件大小: {args.max_size / 1024 / 1024:.2f}MB")
+    print(f"- 输出基础目录: {args.output_dir}")
+    print(f"- 输出格式: {args.format}")
+    
+    # 执行分析
+    result = smart_ingest(
+        directory=args.source,
+        max_file_size=args.max_size,
+        output_format=args.format
+    )
+    
+    print(f"\n分析完成！输出文件：")
+    print(f"1. 目录树: {config.get_output_path(config.get_output_file('tree'), 'trees')}")
+    print(f"2. 分析报告: {config.get_output_path(config.get_output_file(args.format), 'reports')}")
+    print(f"3. JSON报告: {config.get_output_path(config.get_output_file('json'), 'reports')}")

From 9eed1e5d529185b0b7edb24c5b7ef03f5094d1cd Mon Sep 17 00:00:00 2001
From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com>
Date: Wed, 1 Jan 2025 20:01:15 +0100
Subject: [PATCH 2/2] Adjustments clark874's changes to English, add
 docstrings, and fixes

---
 config.yaml           |  32 ++--
 config_manager.py     | 142 ----------------
 smart_analysis.py     | 386 ++++++++++++++++++++++--------------------
 src/config_manager.py | 312 ++++++++++++++++++++++++++++++++++
 4 files changed, 529 insertions(+), 343 deletions(-)
 delete mode 100644 config_manager.py
 create mode 100644 src/config_manager.py

diff --git a/config.yaml b/config.yaml
index ff70283f..f362f782 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,4 +1,4 @@
-# 输入配置
+# Input configuration
 input:
   supported_sources:
     - local
@@ -6,28 +6,28 @@ input:
     - gitlab
   default_source: local
 
-# 路径配置
+# Path configuration
 paths:
   input:
-    base_dir: .  # 默认使用当前目录
-    github: ""   # 默认GitHub仓库
-    gitlab: ""   # 默认GitLab仓库
+    base_dir: .  # Use the current directory by default
+    github: ""   # Default GitHub repository
+    gitlab: ""   # Default GitLab repository
   output:
-    base_dir: output  # 输出基础目录
-    reports: reports  # 报告目录
-    trees: trees     # 目录树文件
-    temp: temp       # 临时文件
+    base_dir: output  # Output base directory
+    reports: reports  # Report directory
+    trees: trees      # Directory tree file
+    temp: temp        # Temporary file
 
-# 树形结构配置
+# Tree structure configuration
 tree:
-  max_depth: 4  # 目录树最大深度
+  max_depth: 4 # Maximum depth of directory tree
 
-# 文件配置
+# File configuration
 file:
   max_size: 10485760  # 10MB
-  encoding: utf-8     # 文件编码
+  encoding: utf-8     # File encoding
 
-# 输出配置
+# Output configuration
 output:
   formats:
     - md
@@ -40,6 +40,6 @@ output:
     txt: analysis_report.txt
     tree: directory_tree.txt
 
-# 内容配置
+# Content configuration
 content:
-  preview_length: 1000  # 内容预览长度（字符数）
+  preview_length: 1000 # Content preview length (number of characters)
diff --git a/config_manager.py b/config_manager.py
deleted file mode 100644
index b563ce39..00000000
--- a/config_manager.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import os
-import yaml
-from typing import Dict, List, Optional
-
-class ConfigManager:
-    """配置管理器：负责加载和管理配置"""
-    
-    def __init__(self, config_file: str = "config.yaml"):
-        self.config_file = config_file
-        self.config = self._load_config()
-        self._init_directories()
-    
-    def _load_config(self) -> Dict:
-        """加载配置文件"""
-        try:
-            with open(self.config_file, 'r', encoding='utf-8') as f:
-                return yaml.safe_load(f)
-        except Exception as e:
-            print(f"加载配置文件失败: {str(e)}")
-            return {}
-    
-    def _init_directories(self):
-        """初始化所有必要的目录"""
-        # 创建输出基础目录
-        os.makedirs(self.output_base_dir, exist_ok=True)
-        # 创建报告目录
-        os.makedirs(self.reports_dir, exist_ok=True)
-        # 创建树文件目录
-        os.makedirs(self.trees_dir, exist_ok=True)
-        # 创建临时文件目录
-        os.makedirs(self.temp_dir, exist_ok=True)
-    
-    @property
-    def tree_max_depth(self) -> int:
-        """获取目录树最大深度"""
-        return self.config.get('tree', {}).get('max_depth', 4)
-    
-    @property
-    def max_file_size(self) -> int:
-        """获取最大文件大小"""
-        return self.config.get('file', {}).get('max_size', 10 * 1024 * 1024)
-    
-    @property
-    def file_encoding(self) -> str:
-        """获取文件编码"""
-        return self.config.get('file', {}).get('encoding', 'utf-8')
-    
-    # 输入路径相关
-    @property
-    def input_base_dir(self) -> str:
-        """获取输入基础目录"""
-        return self.config.get('paths', {}).get('input', {}).get('base_dir', os.getcwd())
-    
-    @property
-    def github_repo(self) -> str:
-        """获取默认GitHub仓库地址"""
-        return self.config.get('paths', {}).get('input', {}).get('github', '')
-    
-    @property
-    def gitlab_repo(self) -> str:
-        """获取默认GitLab仓库地址"""
-        return self.config.get('paths', {}).get('input', {}).get('gitlab', '')
-    
-    # 输出路径相关
-    @property
-    def output_base_dir(self) -> str:
-        """获取输出基础目录"""
-        return self.config.get('paths', {}).get('output', {}).get('base_dir', 'output')
-    
-    @property
-    def reports_dir(self) -> str:
-        """获取报告目录"""
-        reports = self.config.get('paths', {}).get('output', {}).get('reports', 'reports')
-        return os.path.join(self.output_base_dir, reports)
-    
-    @property
-    def trees_dir(self) -> str:
-        """获取树文件目录"""
-        trees = self.config.get('paths', {}).get('output', {}).get('trees', 'trees')
-        return os.path.join(self.output_base_dir, trees)
-    
-    @property
-    def temp_dir(self) -> str:
-        """获取临时文件目录"""
-        temp = self.config.get('paths', {}).get('output', {}).get('temp', 'temp')
-        return os.path.join(self.output_base_dir, temp)
-    
-    @property
-    def supported_formats(self) -> List[str]:
-        """获取支持的输出格式"""
-        return self.config.get('output', {}).get('formats', ['md'])
-    
-    @property
-    def default_format(self) -> str:
-        """获取默认输出格式"""
-        return self.config.get('output', {}).get('default_format', 'md')
-    
-    def get_output_file(self, format_type: str) -> str:
-        """获取指定格式的输出文件名"""
-        files = self.config.get('output', {}).get('files', {})
-        return files.get(format_type, f'analysis_result.{format_type}')
-    
-    def get_output_path(self, filename: str, output_type: str = 'reports') -> str:
-        """
-        获取输出文件的完整路径
-        :param filename: 文件名
-        :param output_type: 输出类型（reports/trees/temp）
-        :return: 完整路径
-        """
-        if output_type == 'reports':
-            base_dir = self.reports_dir
-        elif output_type == 'trees':
-            base_dir = self.trees_dir
-        elif output_type == 'temp':
-            base_dir = self.temp_dir
-        else:
-            base_dir = self.output_base_dir
-            
-        return os.path.join(base_dir, filename)
-    
-    @property
-    def content_preview_length(self) -> int:
-        """获取内容预览长度"""
-        return self.config.get('content', {}).get('preview_length', 1000)
-    
-    @property
-    def supported_sources(self) -> List[str]:
-        """获取支持的输入源类型"""
-        return self.config.get('input', {}).get('supported_sources', ['local'])
-    
-    @property
-    def default_source(self) -> str:
-        """获取默认输入源类型"""
-        return self.config.get('input', {}).get('default_source', 'local')
-    
-    def validate_format(self, format_type: str) -> bool:
-        """验证输出格式是否支持"""
-        return format_type in self.supported_formats
-    
-    def validate_source(self, source_type: str) -> bool:
-        """验证输入源类型是否支持"""
-        return source_type in self.supported_sources
diff --git a/smart_analysis.py b/smart_analysis.py
index 9683a9fb..61479973 100644
--- a/smart_analysis.py
+++ b/smart_analysis.py
@@ -1,248 +1,264 @@
-import os
-import subprocess
-import json
 import argparse
-from typing import List, Tuple, Dict
-from gitingest.ingest import ingest
+import json
+import subprocess
+from typing import Any
+
 from config_manager import ConfigManager
+from gitingest.ingest import ingest
 
-# 加载配置
+# Load configuration
 config = ConfigManager()
 
-def generate_tree(directory: str, max_depth: int = None) -> str:
+
+def generate_tree(directory: str, max_depth: int | None = None) -> str:
     """
-    生成目录树结构
-    :param directory: 要分析的目录路径
-    :param max_depth: 树的最大深度
-    :return: 目录树的字符串表示
+    Generate a directory tree structure
+
+    Parameters
+    ----------
+    directory : str
+        Directory path to analyze.
+    max_depth : int | None
+        Maximum depth of the tree, by default None.
+
+    Returns
+    -------
+    str
+        String representation of the directory tree.
     """
     if max_depth is None:
         max_depth = config.tree_max_depth
-        
+
     try:
-        result = subprocess.run(
-            ['tree', '-L', str(max_depth)],
-            cwd=directory,
-            capture_output=True,
-            text=True
-        )
+        result = subprocess.run(["tree", "-L", str(max_depth)], cwd=directory, capture_output=True, text=True)
         return result.stdout
     except Exception as e:
-        return f"生成目录树时出错: {str(e)}"
+        return f"Failed to generate tree: {e}"
 
-def analyze_tree_and_suggest_patterns(tree_output: str) -> Tuple[List[str], List[str]]:
+
+def analyze_tree_and_suggest_patterns(tree_output: str) -> tuple[list[str], list[str]]:
     """
-    分析目录树并建议包含和排除模式
-    :param tree_output: 目录树字符串
-    :return: (包含模式列表, 排除模式列表)
+    Analyze the directory tree and suggest include/exclude patterns
+
+    Parameters
+    ----------
+    tree_output : str
+        String representation of the directory tree.
+
+    Returns
+    -------
+    tuple[list[str], list[str]]
+        List of include patterns and list of exclude patterns.
     """
-    # 针对markdown文档的包含模式
+    #
     include_patterns = [
-        # 核心源代码
-        "**/*.py",           # Python源代码
-        
-        # 文档和配置
-        "README.md",         # 主要文档
-        "CHANGELOG.md",      # 变更日志
-        "LICENSE",           # 许可证
-        "requirements.txt",  # Python依赖
-        "pyproject.toml",    # Python项目配置
-        "setup.py",         # 安装配置
-        "setup.cfg",        # 安装配置
-        "MANIFEST.in",      # 打包配置
-        
-        # 核心文档（选择性包含）
-        "docs/**/*.md",     # 文档目录下的markdown文件
+        "**/*.py",
+        "README.md",
+        "CHANGELOG.md",
+        "LICENSE",
+        "requirements.txt",
+        "pyproject.toml",
+        "setup.py",
+        "setup.cfg",
+        "MANIFEST.in",
+        "docs/**/*.md",
     ]
-    
-    # 排除模式 - 更细致的控制
+
     exclude_patterns = [
-        # 二进制和生成文件
-        "**/*.pyc",          # Python编译文件
-        "**/__pycache__/**", # Python缓存
-        "**/*.so",           # 编译的扩展模块
-        "**/*.pyd",          # Windows下的Python扩展模块
-        "**/*.dll",          # Windows动态链接库
-        "**/*.dylib",        # Mac动态链接库
-        "**/*.egg",          # Python打包文件
-        "**/*.whl",          # Python wheel包
-        "**/*.exe",          # 可执行文件
-        
-        # 媒体文件
-        "**/*.png",          # PNG图片
-        "**/*.jpg",          # JPG图片
-        "**/*.jpeg",         # JPEG图片
-        "**/*.gif",          # GIF图片
-        "**/*.ico",          # 图标文件
-        "**/*.svg",          # SVG图片
-        "**/*.mp4",          # 视频文件
-        "**/*.mov",          # 视频文件
-        "**/*.avi",          # 视频文件
-        "**/*.mp3",          # 音频文件
-        "**/*.wav",          # 音频文件
-        
-        # 开发工具和临时文件
-        "**/.git/**",        # Git目录
-        "**/.idea/**",       # PyCharm配置
-        "**/.vscode/**",     # VSCode配置
-        "**/.env",           # 环境变量
-        "**/.env.*",         # 环境变量文件
-        "**/node_modules/**", # Node.js模块
-        "**/venv/**",        # Python虚拟环境
-        "**/env/**",         # Python虚拟环境
-        "**/build/**",       # 构建目录
-        "**/dist/**",        # 分发目录
-        "**/.pytest_cache/**", # Pytest缓存
-        "**/.coverage",      # 测试覆盖率文件
-        "**/htmlcov/**",     # 测试覆盖率报告
-        
-        # 编译和打包相关
-        "**/*.min.js",       # 压缩的JS文件
-        "**/*.min.css",      # 压缩的CSS文件
-        "**/*.map",          # Source map文件
-        "**/webpack.stats.json", # Webpack统计文件
-        
-        # UI构建文件
-        "**/ui/**/*.js",     # UI JavaScript文件
-        "**/ui/**/*.css",    # UI样式文件
-        "**/ui/build/**",    # UI构建输出
-        "**/ui/dist/**",     # UI分发文件
-        
-        # 测试文件（可选，取决于是否需要包含测试文档）
-        "**/tests/**",       # 测试目录
-        "**/test_*.py",      # 测试文件
-        "**/*_test.py",      # 测试文件
-        
-        # Jupyter notebooks（可选）
-        "**/*.ipynb",        # Jupyter笔记本
-        "**/.ipynb_checkpoints/**", # Jupyter检查点
+        "**/*.pyc",
+        "**/__pycache__/**",
+        "**/*.so" "**/*.pyd",
+        "**/*.dll",
+        "**/*.dylib",
+        "**/*.egg",
+        "**/*.whl",
+        "**/*.exe",
+        "**/*.png",
+        "**/*.jpg",
+        "**/*.jpeg",
+        "**/*.gif",
+        "**/*.ico",
+        "**/*.svg",
+        "**/*.mp4",
+        "**/*.mov",
+        "**/*.avi",
+        "**/*.mp3",
+        "**/*.wav",
+        "**/.git/**",
+        "**/.idea/**",
+        "**/.vscode/**",
+        "**/.env",
+        "**/.env.*",
+        "**/node_modules/**",
+        "**/venv/**",
+        "**/env/**",
+        "**/build/**",
+        "**/dist/**",
+        "**/.pytest_cache/**",
+        "**/.coverage",
+        "**/htmlcov/**",
+        "**/*.min.js",
+        "**/*.min.css",
+        "**/*.map",
+        "**/webpack.stats.json",
+        "**/ui/**/*.js",
+        "**/ui/**/*.css",
+        "**/ui/build/**",
+        "**/ui/dist/**",
+        "**/tests/**",
+        "**/test_*.py",
+        "**/*_test.py",
+        "**/*.ipynb",
+        "**/.ipynb_checkpoints/**",
     ]
-    
+
     return include_patterns, exclude_patterns
 
-def smart_ingest(directory: str, max_file_size: int = None, output_format: str = None) -> Dict:
+
+def smart_ingest(directory: str, max_file_size: int | None = None, output_format: str | None = None) -> dict[str, Any]:
     """
-    智能分析目录并生成报告
-    :param directory: 要分析的目录路径
-    :param max_file_size: 最大文件大小限制
-    :param output_format: 输出格式（md/json/txt）
-    :return: 分析报告字典
+    Perform smart ingest analysis on the given directory and return the report.
+
+    Parameters
+    ----------
+    directory : str
+        Directory path to analyze.
+    max_file_size : int
+        Maximum file size to analyze, by default None.
+    output_format : str
+        Output format of the report, by default None.
+
+    Returns
+    -------
+    dict[str, Any]
+        Report of the analysis.
+
+    Raises
+    ------
+    ValueError
+        If the output format is not supported.
     """
     if max_file_size is None:
         max_file_size = config.max_file_size
     if output_format is None:
         output_format = config.default_format
-        
+
     if not config.validate_format(output_format):
-        raise ValueError(f"不支持的输出格式: {output_format}")
-    
-    # 步骤1：生成目录树
-    print("步骤1: 生成目录树...")
+        raise ValueError(f"Unsupported output format: {output_format}")
+
+    # Step 1: Generate directory tree
+    print("Step 1: Generating directory tree...", end="\n\n")
     tree_output = generate_tree(directory)
     print(tree_output)
-    
-    # 保存目录树
-    tree_file = config.get_output_path(config.get_output_file('tree'), 'trees')
-    with open(tree_file, 'w', encoding=config.file_encoding) as f:
+
+    # Save the tree output to a file
+    tree_file = config.get_output_path(config.get_output_file("tree"), "trees")
+    with open(tree_file, "w", encoding=config.file_encoding) as f:
         f.write(tree_output)
-    
-    # 步骤2：分析树结构并建议过滤模式
-    print("\n步骤2: 分析目录结构并生成建议...")
+
+    # Step 2: Analyze the directory structure and suggest filter patterns
+    print("Step 2: Analyzing directory structure and suggesting filter patterns...")
     include_patterns, exclude_patterns = analyze_tree_and_suggest_patterns(tree_output)
-    
-    print("建议的包含模式:")
+
+    print("Suggested include patterns:", end="\n\n")
     for pattern in include_patterns:
         print(f"  - {pattern}")
-    
-    print("\n建议的排除模式:")
+
+    print("Suggested exclude patterns:", end="\n\n")
     for pattern in exclude_patterns:
         print(f"  - {pattern}")
-    
-    # 步骤3：执行ingest
-    print("\n步骤3: 执行文件分析...")
+
+    # Step 3: Execute file analysis
+    print("\nStep 3: Executing file analysis...")
     try:
         summary, tree, content = ingest(
             source=directory,
             max_file_size=max_file_size,
             include_patterns=include_patterns,
             exclude_patterns=exclude_patterns,
-            output=config.get_output_path(config.get_output_file(output_format), 'reports')
+            output=config.get_output_path(config.get_output_file(output_format), "reports"),
         )
-        
-        # 返回完整报告
+
+        # Report
         report = {
             "directory_tree": tree_output,
             "suggested_patterns": {
                 "include": include_patterns,
-                "exclude": exclude_patterns
+                "exclude": exclude_patterns,
             },
             "analysis_result": {
                 "summary": summary,
                 "tree": tree,
-                "content": content[:config.content_preview_length] + "..." 
-                          if len(content) > config.content_preview_length else content
-            }
+                "content": (
+                    content[: config.content_preview_length] + "..."
+                    if len(content) > config.content_preview_length
+                    else content
+                ),
+            },
         }
-        
-        # 保存JSON报告
-        json_file = config.get_output_path(config.get_output_file('json'), 'reports')
+
+        # Save the report to a JSON file
+        json_file = config.get_output_path(config.get_output_file("json"), "reports")
         with open(json_file, "w", encoding=config.file_encoding) as f:
             json.dump(report, f, ensure_ascii=False, indent=2)
-            
+
         return report
-        
+
     except Exception as e:
         return {
-            "error": f"分析过程中出错: {str(e)}",
+            "error": f"分析过程中出错: {e}",
             "directory_tree": tree_output,
             "suggested_patterns": {
                 "include": include_patterns,
-                "exclude": exclude_patterns
-            }
+                "exclude": exclude_patterns,
+            },
         }
 
-if __name__ == "__main__":
-    # 配置命令行参数
-    parser = argparse.ArgumentParser(description="智能代码仓库分析工具")
-    parser.add_argument("--source", "-s", type=str, 
-                      default=config.input_base_dir,
-                      help="要分析的源目录路径")
-    parser.add_argument("--source-type", "-t", type=str,
-                      choices=config.supported_sources,
-                      default=config.default_source,
-                      help="输入源类型")
-    parser.add_argument("--max-depth", "-d", type=int, 
-                      default=config.tree_max_depth,
-                      help="目录树最大深度")
-    parser.add_argument("--max-size", "-m", type=int, 
-                      default=config.max_file_size,
-                      help="最大文件大小(bytes)")
-    parser.add_argument("--output-dir", "-o", type=str, 
-                      default=config.output_base_dir,
-                      help="输出基础目录")
-    parser.add_argument("--format", "-f", type=str,
-                      choices=config.supported_formats,
-                      default=config.default_format,
-                      help="输出格式")
-    
-    args = parser.parse_args()
-    
-    print(f"开始分析目录: {args.source}")
-    print(f"配置信息:")
-    print(f"- 输入源类型: {args.source_type}")
-    print(f"- 目录树深度: {args.max_depth}")
-    print(f"- 最大文件大小: {args.max_size / 1024 / 1024:.2f}MB")
-    print(f"- 输出基础目录: {args.output_dir}")
-    print(f"- 输出格式: {args.format}")
-    
-    # 执行分析
-    result = smart_ingest(
-        directory=args.source,
-        max_file_size=args.max_size,
-        output_format=args.format
+
+def main() -> None:
+    # Parse command-line arguments
+    parser = argparse.ArgumentParser(description="Intelligent directory analysis tool")
+    parser.add_argument("--source", "-s", type=str, default=config.input_base_dir, help="Directory to analyze")
+    parser.add_argument(
+        "--source-type",
+        "-t",
+        type=str,
+        choices=config.supported_sources,
+        default=config.default_source,
+        help="Input source type",
+    )
+    parser.add_argument("--max-depth", "-d", type=int, default=config.tree_max_depth, help="Maximum depth of the tree")
+    parser.add_argument(
+        "--max-size", "-m", type=int, default=config.max_file_size, help="Maximum file size to analyze"
+    )
+    parser.add_argument("--output-dir", "-o", type=str, default=config.output_base_dir, help="Output base directory")
+    parser.add_argument(
+        "--format",
+        "-f",
+        type=str,
+        choices=config.supported_formats,
+        default=config.default_format,
+        help="Output format of the report",
     )
-    
-    print(f"\n分析完成！输出文件：")
-    print(f"1. 目录树: {config.get_output_path(config.get_output_file('tree'), 'trees')}")
-    print(f"2. 分析报告: {config.get_output_path(config.get_output_file(args.format), 'reports')}")
-    print(f"3. JSON报告: {config.get_output_path(config.get_output_file('json'), 'reports')}")
+
+    args = parser.parse_args()
+
+    print(f"Start analyzing directory: {args.source}")
+    print(f"Configuration information:")
+    print(f"- Input source type: {args.source_type}")
+    print(f"- Directory tree depth: {args.max_depth}")
+    print(f"- Maximum file size: {args.max_size / 1024 / 1024:.2f}MB")
+    print(f"- Output base directory: {args.output_dir}")
+    print(f"- Output format: {args.format}")
+
+    # Perform smart ingest analysis
+    result = smart_ingest(directory=args.source, max_file_size=args.max_size, output_format=args.format)
+
+    print(f"\nAnalysis completed! Output file:")
+    print(f"1. Directory tree: {config.get_output_path(config.get_output_file('tree'), 'trees')}")
+    print(f"2. Analysis report: {config.get_output_path(config.get_output_file(args.format), 'reports')}")
+    print(f"3. JSON report: {config.get_output_path(config.get_output_file('json'), 'reports')}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/config_manager.py b/src/config_manager.py
new file mode 100644
index 00000000..1cf85592
--- /dev/null
+++ b/src/config_manager.py
@@ -0,0 +1,312 @@
+""" Configuration manager module for loading and managing configuration. """
+
+import os
+from typing import Any
+
+import yaml
+
+
+class ConfigManager:
+    """Configuration manager class for loading and managing configuration."""
+
+    def __init__(self, config_file: str = "config.yaml") -> None:
+        """
+        Initialize the configuration manager.
+
+        Parameters
+        ----------
+        config_file : str, optional
+            The configuration file path, by default "config.yaml".
+        """
+        self.config_file = config_file
+        self.config = self._load_config()
+        self._init_directories()
+
+    def _load_config(self) -> dict[str, Any]:
+        """
+        Load configuration from the config file.
+
+        Returns
+        -------
+        dict[str, Any]
+            The configuration dictionary.
+        """
+        try:
+            with open(self.config_file, encoding="utf-8") as f:
+                return yaml.safe_load(f)
+        except Exception as e:
+            print(f"Failed to load configuration file: {e}")
+            return {}
+
+    def _init_directories(self) -> None:
+        """
+        Initialize directories based on the configuration.
+        """
+        # Create output base directory
+        os.makedirs(self.output_base_dir, exist_ok=True)
+        # Create reports directory
+        os.makedirs(self.reports_dir, exist_ok=True)
+        # Create trees directory
+        os.makedirs(self.trees_dir, exist_ok=True)
+        # Create temporary directory
+        os.makedirs(self.temp_dir, exist_ok=True)
+
+    @property
+    def tree_max_depth(self) -> int:
+        """
+        Get the maximum depth of the tree.
+
+        Returns
+        -------
+        int
+            The maximum depth of the tree.
+        """
+        return self.config.get("tree", {}).get("max_depth", 4)
+
+    @property
+    def max_file_size(self) -> int:
+        """
+        Get the maximum file size.
+
+        Returns
+        -------
+        int
+            The maximum file size.
+        """
+        return self.config.get("file", {}).get("max_size", 10 * 1024 * 1024)
+
+    @property
+    def file_encoding(self) -> str:
+        """
+        Get the file encoding.
+
+        Returns
+        -------
+        str
+            The file encoding.
+        """
+        return self.config.get("file", {}).get("encoding", "utf-8")
+
+    # Input path related
+    @property
+    def input_base_dir(self) -> str:
+        """
+        Get the input base directory.
+
+        Returns
+        -------
+        str
+            The input base directory.
+        """
+        return self.config.get("paths", {}).get("input", {}).get("base_dir", os.getcwd())
+
+    @property
+    def github_repo(self) -> str:
+        """
+        Get the default GitHub repository address.
+
+        Returns
+        -------
+        str
+            The default GitHub repository address.
+        """
+        return self.config.get("paths", {}).get("input", {}).get("github", "")
+
+    @property
+    def gitlab_repo(self) -> str:
+        """
+        Get the default GitLab repository address.
+
+        Returns
+        -------
+        str
+            The default GitLab repository address.
+        """
+        return self.config.get("paths", {}).get("input", {}).get("gitlab", "")
+
+    @property
+    def output_base_dir(self) -> str:
+        """
+        Get the output base directory.
+
+        Returns
+        -------
+        str
+            The output base directory.
+        """
+        return self.config.get("paths", {}).get("output", {}).get("base_dir", "output")
+
+    @property
+    def reports_dir(self) -> str:
+        """
+        Get the reports directory.
+
+        Returns
+        -------
+        str
+            The reports directory.
+        """
+        reports = self.config.get("paths", {}).get("output", {}).get("reports", "reports")
+        return os.path.join(self.output_base_dir, reports)
+
+    @property
+    def trees_dir(self) -> str:
+        """
+        Get the trees directory."
+
+        Returns
+        -------
+        str
+            The trees directory.
+        """
+        trees = self.config.get("paths", {}).get("output", {}).get("trees", "trees")
+        return os.path.join(self.output_base_dir, trees)
+
+    @property
+    def temp_dir(self) -> str:
+        """
+        Get the temporary directory.
+
+        Returns
+        -------
+        str
+            The temporary directory.
+        """
+        temp = self.config.get("paths", {}).get("output", {}).get("temp", "temp")
+        return os.path.join(self.output_base_dir, temp)
+
+    @property
+    def supported_formats(self) -> list[str]:
+        """
+        Get the supported output formats.
+
+        Returns
+        -------
+        list[str]
+            The supported output formats.
+        """
+        return self.config.get("output", {}).get("formats", ["md"])
+
+    @property
+    def default_format(self) -> str:
+        """
+        Get the default output format.
+
+        Returns
+        -------
+        str
+            The default output format.
+        """
+        return self.config.get("output", {}).get("default_format", "md")
+
+    def get_output_file(self, format_type: str) -> str:
+        """
+        Get the output file name based on the format type.
+
+        Parameters
+        ----------
+        format_type : str
+            The format type.
+
+        Returns
+        -------
+        str
+            The output file name.
+        """
+        files = self.config.get("output", {}).get("files", {})
+        return files.get(format_type, f"analysis_result.{format_type}")
+
+    def get_output_path(self, filename: str, output_type: str = "reports") -> str:
+        """
+        Get the full output path based on the filename and output type.
+
+        Parameters
+        ----------
+        filename : str
+            The filename to be used.
+        output_type : str, optional
+            The type of output (reports, trees, temp), by default "reports".
+
+        Returns
+        -------
+        str
+            The full output path.
+        """
+        if output_type == "reports":
+            base_dir = self.reports_dir
+        elif output_type == "trees":
+            base_dir = self.trees_dir
+        elif output_type == "temp":
+            base_dir = self.temp_dir
+        else:
+            base_dir = self.output_base_dir
+
+        return os.path.join(base_dir, filename)
+
+    @property
+    def content_preview_length(self) -> int:
+        """
+        Get the content preview length.
+
+        Returns
+        -------
+        int
+            The content preview length.
+        """
+        return self.config.get("content", {}).get("preview_length", 1000)
+
+    @property
+    def supported_sources(self) -> list[str]:
+        """
+        Get the supported input sources.
+
+        Returns
+        -------
+        list[str]
+            The supported input sources.
+        """
+        return self.config.get("input", {}).get("supported_sources", ["local"])
+
+    @property
+    def default_source(self) -> str:
+        """
+        Get the default input source type.
+
+        Returns
+        -------
+        str
+            The default input source type.
+        """
+        return self.config.get("input", {}).get("default_source", "local")
+
+    def validate_format(self, format_type: str) -> bool:
+        """
+        Validate if the output format is supported.
+
+        Parameters
+        ----------
+        format_type : str
+            The output format type.
+
+        Returns
+        -------
+        bool
+            True if the format is supported, False otherwise.
+        """
+        return format_type in self.supported_formats
+
+    def validate_source(self, source_type: str) -> bool:
+        """
+        Validate if the input source is supported.
+
+        Parameters
+        ----------
+        source_type : str
+            The input source type.
+
+        Returns
+        -------
+        bool
+            True if the source is supported, False otherwise.
+        """
+        return source_type in self.supported_sources