From b8439e352ed7fd2af8c30f9ef2aab42320b661a6 Mon Sep 17 00:00:00 2001 From: clark874 <86122040+clark874@users.noreply.github.com> Date: Wed, 1 Jan 2025 20:00:06 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E6=99=BA?= =?UTF-8?q?=E8=83=BD=E4=BB=A3=E7=A0=81=E4=BB=93=E5=BA=93=E5=88=86=E6=9E=90?= =?UTF-8?q?=E5=B7=A5=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 添加配置文件管理系统 2. 添加智能分析工具 3. 优化输出路径管理 --- config.yaml | 45 +++++++++ config_manager.py | 142 ++++++++++++++++++++++++++ smart_analysis.py | 248 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 435 insertions(+) create mode 100644 config.yaml create mode 100644 config_manager.py create mode 100644 smart_analysis.py diff --git a/config.yaml b/config.yaml new file mode 100644 index 00000000..ff70283f --- /dev/null +++ b/config.yaml @@ -0,0 +1,45 @@ +# 输入配置 +input: + supported_sources: + - local + - github + - gitlab + default_source: local + +# 路径配置 +paths: + input: + base_dir: . # 默认使用当前目录 + github: "" # 默认GitHub仓库 + gitlab: "" # 默认GitLab仓库 + output: + base_dir: output # 输出基础目录 + reports: reports # 报告目录 + trees: trees # 目录树文件 + temp: temp # 临时文件 + +# 树形结构配置 +tree: + max_depth: 4 # 目录树最大深度 + +# 文件配置 +file: + max_size: 10485760 # 10MB + encoding: utf-8 # 文件编码 + +# 输出配置 +output: + formats: + - md + - json + - txt + default_format: md + files: + md: analysis_report.md + json: analysis_report.json + txt: analysis_report.txt + tree: directory_tree.txt + +# 内容配置 +content: + preview_length: 1000 # 内容预览长度(字符数) diff --git a/config_manager.py b/config_manager.py new file mode 100644 index 00000000..b563ce39 --- /dev/null +++ b/config_manager.py @@ -0,0 +1,142 @@ +import os +import yaml +from typing import Dict, List, Optional + +class ConfigManager: + """配置管理器:负责加载和管理配置""" + + def __init__(self, config_file: str = "config.yaml"): + self.config_file = config_file + self.config = self._load_config() + self._init_directories() + + def _load_config(self) -> Dict: + """加载配置文件""" + try: + with open(self.config_file, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + except Exception as e: + print(f"加载配置文件失败: {str(e)}") + return {} + + def _init_directories(self): + """初始化所有必要的目录""" + # 创建输出基础目录 + os.makedirs(self.output_base_dir, exist_ok=True) + # 创建报告目录 + os.makedirs(self.reports_dir, exist_ok=True) + # 创建树文件目录 + os.makedirs(self.trees_dir, exist_ok=True) + # 创建临时文件目录 + os.makedirs(self.temp_dir, exist_ok=True) + + @property + def tree_max_depth(self) -> int: + """获取目录树最大深度""" + return self.config.get('tree', {}).get('max_depth', 4) + + @property + def max_file_size(self) -> int: + """获取最大文件大小""" + return self.config.get('file', {}).get('max_size', 10 * 1024 * 1024) + + @property + def file_encoding(self) -> str: + """获取文件编码""" + return self.config.get('file', {}).get('encoding', 'utf-8') + + # 输入路径相关 + @property + def input_base_dir(self) -> str: + """获取输入基础目录""" + return self.config.get('paths', {}).get('input', {}).get('base_dir', os.getcwd()) + + @property + def github_repo(self) -> str: + """获取默认GitHub仓库地址""" + return self.config.get('paths', {}).get('input', {}).get('github', '') + + @property + def gitlab_repo(self) -> str: + """获取默认GitLab仓库地址""" + return self.config.get('paths', {}).get('input', {}).get('gitlab', '') + + # 输出路径相关 + @property + def output_base_dir(self) -> str: + """获取输出基础目录""" + return self.config.get('paths', {}).get('output', {}).get('base_dir', 'output') + + @property + def reports_dir(self) -> str: + """获取报告目录""" + reports = self.config.get('paths', {}).get('output', {}).get('reports', 'reports') + return os.path.join(self.output_base_dir, reports) + + @property + def trees_dir(self) -> str: + """获取树文件目录""" + trees = self.config.get('paths', {}).get('output', {}).get('trees', 'trees') + return os.path.join(self.output_base_dir, trees) + + @property + def temp_dir(self) -> str: + """获取临时文件目录""" + temp = self.config.get('paths', {}).get('output', {}).get('temp', 'temp') + return os.path.join(self.output_base_dir, temp) + + @property + def supported_formats(self) -> List[str]: + """获取支持的输出格式""" + return self.config.get('output', {}).get('formats', ['md']) + + @property + def default_format(self) -> str: + """获取默认输出格式""" + return self.config.get('output', {}).get('default_format', 'md') + + def get_output_file(self, format_type: str) -> str: + """获取指定格式的输出文件名""" + files = self.config.get('output', {}).get('files', {}) + return files.get(format_type, f'analysis_result.{format_type}') + + def get_output_path(self, filename: str, output_type: str = 'reports') -> str: + """ + 获取输出文件的完整路径 + :param filename: 文件名 + :param output_type: 输出类型(reports/trees/temp) + :return: 完整路径 + """ + if output_type == 'reports': + base_dir = self.reports_dir + elif output_type == 'trees': + base_dir = self.trees_dir + elif output_type == 'temp': + base_dir = self.temp_dir + else: + base_dir = self.output_base_dir + + return os.path.join(base_dir, filename) + + @property + def content_preview_length(self) -> int: + """获取内容预览长度""" + return self.config.get('content', {}).get('preview_length', 1000) + + @property + def supported_sources(self) -> List[str]: + """获取支持的输入源类型""" + return self.config.get('input', {}).get('supported_sources', ['local']) + + @property + def default_source(self) -> str: + """获取默认输入源类型""" + return self.config.get('input', {}).get('default_source', 'local') + + def validate_format(self, format_type: str) -> bool: + """验证输出格式是否支持""" + return format_type in self.supported_formats + + def validate_source(self, source_type: str) -> bool: + """验证输入源类型是否支持""" + return source_type in self.supported_sources diff --git a/smart_analysis.py b/smart_analysis.py new file mode 100644 index 00000000..9683a9fb --- /dev/null +++ b/smart_analysis.py @@ -0,0 +1,248 @@ +import os +import subprocess +import json +import argparse +from typing import List, Tuple, Dict +from gitingest.ingest import ingest +from config_manager import ConfigManager + +# 加载配置 +config = ConfigManager() + +def generate_tree(directory: str, max_depth: int = None) -> str: + """ + 生成目录树结构 + :param directory: 要分析的目录路径 + :param max_depth: 树的最大深度 + :return: 目录树的字符串表示 + """ + if max_depth is None: + max_depth = config.tree_max_depth + + try: + result = subprocess.run( + ['tree', '-L', str(max_depth)], + cwd=directory, + capture_output=True, + text=True + ) + return result.stdout + except Exception as e: + return f"生成目录树时出错: {str(e)}" + +def analyze_tree_and_suggest_patterns(tree_output: str) -> Tuple[List[str], List[str]]: + """ + 分析目录树并建议包含和排除模式 + :param tree_output: 目录树字符串 + :return: (包含模式列表, 排除模式列表) + """ + # 针对markdown文档的包含模式 + include_patterns = [ + # 核心源代码 + "**/*.py", # Python源代码 + + # 文档和配置 + "README.md", # 主要文档 + "CHANGELOG.md", # 变更日志 + "LICENSE", # 许可证 + "requirements.txt", # Python依赖 + "pyproject.toml", # Python项目配置 + "setup.py", # 安装配置 + "setup.cfg", # 安装配置 + "MANIFEST.in", # 打包配置 + + # 核心文档(选择性包含) + "docs/**/*.md", # 文档目录下的markdown文件 + ] + + # 排除模式 - 更细致的控制 + exclude_patterns = [ + # 二进制和生成文件 + "**/*.pyc", # Python编译文件 + "**/__pycache__/**", # Python缓存 + "**/*.so", # 编译的扩展模块 + "**/*.pyd", # Windows下的Python扩展模块 + "**/*.dll", # Windows动态链接库 + "**/*.dylib", # Mac动态链接库 + "**/*.egg", # Python打包文件 + "**/*.whl", # Python wheel包 + "**/*.exe", # 可执行文件 + + # 媒体文件 + "**/*.png", # PNG图片 + "**/*.jpg", # JPG图片 + "**/*.jpeg", # JPEG图片 + "**/*.gif", # GIF图片 + "**/*.ico", # 图标文件 + "**/*.svg", # SVG图片 + "**/*.mp4", # 视频文件 + "**/*.mov", # 视频文件 + "**/*.avi", # 视频文件 + "**/*.mp3", # 音频文件 + "**/*.wav", # 音频文件 + + # 开发工具和临时文件 + "**/.git/**", # Git目录 + "**/.idea/**", # PyCharm配置 + "**/.vscode/**", # VSCode配置 + "**/.env", # 环境变量 + "**/.env.*", # 环境变量文件 + "**/node_modules/**", # Node.js模块 + "**/venv/**", # Python虚拟环境 + "**/env/**", # Python虚拟环境 + "**/build/**", # 构建目录 + "**/dist/**", # 分发目录 + "**/.pytest_cache/**", # Pytest缓存 + "**/.coverage", # 测试覆盖率文件 + "**/htmlcov/**", # 测试覆盖率报告 + + # 编译和打包相关 + "**/*.min.js", # 压缩的JS文件 + "**/*.min.css", # 压缩的CSS文件 + "**/*.map", # Source map文件 + "**/webpack.stats.json", # Webpack统计文件 + + # UI构建文件 + "**/ui/**/*.js", # UI JavaScript文件 + "**/ui/**/*.css", # UI样式文件 + "**/ui/build/**", # UI构建输出 + "**/ui/dist/**", # UI分发文件 + + # 测试文件(可选,取决于是否需要包含测试文档) + "**/tests/**", # 测试目录 + "**/test_*.py", # 测试文件 + "**/*_test.py", # 测试文件 + + # Jupyter notebooks(可选) + "**/*.ipynb", # Jupyter笔记本 + "**/.ipynb_checkpoints/**", # Jupyter检查点 + ] + + return include_patterns, exclude_patterns + +def smart_ingest(directory: str, max_file_size: int = None, output_format: str = None) -> Dict: + """ + 智能分析目录并生成报告 + :param directory: 要分析的目录路径 + :param max_file_size: 最大文件大小限制 + :param output_format: 输出格式(md/json/txt) + :return: 分析报告字典 + """ + if max_file_size is None: + max_file_size = config.max_file_size + if output_format is None: + output_format = config.default_format + + if not config.validate_format(output_format): + raise ValueError(f"不支持的输出格式: {output_format}") + + # 步骤1:生成目录树 + print("步骤1: 生成目录树...") + tree_output = generate_tree(directory) + print(tree_output) + + # 保存目录树 + tree_file = config.get_output_path(config.get_output_file('tree'), 'trees') + with open(tree_file, 'w', encoding=config.file_encoding) as f: + f.write(tree_output) + + # 步骤2:分析树结构并建议过滤模式 + print("\n步骤2: 分析目录结构并生成建议...") + include_patterns, exclude_patterns = analyze_tree_and_suggest_patterns(tree_output) + + print("建议的包含模式:") + for pattern in include_patterns: + print(f" - {pattern}") + + print("\n建议的排除模式:") + for pattern in exclude_patterns: + print(f" - {pattern}") + + # 步骤3:执行ingest + print("\n步骤3: 执行文件分析...") + try: + summary, tree, content = ingest( + source=directory, + max_file_size=max_file_size, + include_patterns=include_patterns, + exclude_patterns=exclude_patterns, + output=config.get_output_path(config.get_output_file(output_format), 'reports') + ) + + # 返回完整报告 + report = { + "directory_tree": tree_output, + "suggested_patterns": { + "include": include_patterns, + "exclude": exclude_patterns + }, + "analysis_result": { + "summary": summary, + "tree": tree, + "content": content[:config.content_preview_length] + "..." + if len(content) > config.content_preview_length else content + } + } + + # 保存JSON报告 + json_file = config.get_output_path(config.get_output_file('json'), 'reports') + with open(json_file, "w", encoding=config.file_encoding) as f: + json.dump(report, f, ensure_ascii=False, indent=2) + + return report + + except Exception as e: + return { + "error": f"分析过程中出错: {str(e)}", + "directory_tree": tree_output, + "suggested_patterns": { + "include": include_patterns, + "exclude": exclude_patterns + } + } + +if __name__ == "__main__": + # 配置命令行参数 + parser = argparse.ArgumentParser(description="智能代码仓库分析工具") + parser.add_argument("--source", "-s", type=str, + default=config.input_base_dir, + help="要分析的源目录路径") + parser.add_argument("--source-type", "-t", type=str, + choices=config.supported_sources, + default=config.default_source, + help="输入源类型") + parser.add_argument("--max-depth", "-d", type=int, + default=config.tree_max_depth, + help="目录树最大深度") + parser.add_argument("--max-size", "-m", type=int, + default=config.max_file_size, + help="最大文件大小(bytes)") + parser.add_argument("--output-dir", "-o", type=str, + default=config.output_base_dir, + help="输出基础目录") + parser.add_argument("--format", "-f", type=str, + choices=config.supported_formats, + default=config.default_format, + help="输出格式") + + args = parser.parse_args() + + print(f"开始分析目录: {args.source}") + print(f"配置信息:") + print(f"- 输入源类型: {args.source_type}") + print(f"- 目录树深度: {args.max_depth}") + print(f"- 最大文件大小: {args.max_size / 1024 / 1024:.2f}MB") + print(f"- 输出基础目录: {args.output_dir}") + print(f"- 输出格式: {args.format}") + + # 执行分析 + result = smart_ingest( + directory=args.source, + max_file_size=args.max_size, + output_format=args.format + ) + + print(f"\n分析完成!输出文件:") + print(f"1. 目录树: {config.get_output_path(config.get_output_file('tree'), 'trees')}") + print(f"2. 分析报告: {config.get_output_path(config.get_output_file(args.format), 'reports')}") + print(f"3. JSON报告: {config.get_output_path(config.get_output_file('json'), 'reports')}") From 9eed1e5d529185b0b7edb24c5b7ef03f5094d1cd Mon Sep 17 00:00:00 2001 From: Filip Christiansen <22807962+filipchristiansen@users.noreply.github.com> Date: Wed, 1 Jan 2025 20:01:15 +0100 Subject: [PATCH 2/2] Adjustments clark874's changes to English, add docstrings, and fixes --- config.yaml | 32 ++-- config_manager.py | 142 ---------------- smart_analysis.py | 386 ++++++++++++++++++++++-------------------- src/config_manager.py | 312 ++++++++++++++++++++++++++++++++++ 4 files changed, 529 insertions(+), 343 deletions(-) delete mode 100644 config_manager.py create mode 100644 src/config_manager.py diff --git a/config.yaml b/config.yaml index ff70283f..f362f782 100644 --- a/config.yaml +++ b/config.yaml @@ -1,4 +1,4 @@ -# 输入配置 +# Input configuration input: supported_sources: - local @@ -6,28 +6,28 @@ input: - gitlab default_source: local -# 路径配置 +# Path configuration paths: input: - base_dir: . # 默认使用当前目录 - github: "" # 默认GitHub仓库 - gitlab: "" # 默认GitLab仓库 + base_dir: . # Use the current directory by default + github: "" # Default GitHub repository + gitlab: "" # Default GitLab repository output: - base_dir: output # 输出基础目录 - reports: reports # 报告目录 - trees: trees # 目录树文件 - temp: temp # 临时文件 + base_dir: output # Output base directory + reports: reports # Report directory + trees: trees # Directory tree file + temp: temp # Temporary file -# 树形结构配置 +# Tree structure configuration tree: - max_depth: 4 # 目录树最大深度 + max_depth: 4 # Maximum depth of directory tree -# 文件配置 +# File configuration file: max_size: 10485760 # 10MB - encoding: utf-8 # 文件编码 + encoding: utf-8 # File encoding -# 输出配置 +# Output configuration output: formats: - md @@ -40,6 +40,6 @@ output: txt: analysis_report.txt tree: directory_tree.txt -# 内容配置 +# Content configuration content: - preview_length: 1000 # 内容预览长度(字符数) + preview_length: 1000 # Content preview length (number of characters) diff --git a/config_manager.py b/config_manager.py deleted file mode 100644 index b563ce39..00000000 --- a/config_manager.py +++ /dev/null @@ -1,142 +0,0 @@ -import os -import yaml -from typing import Dict, List, Optional - -class ConfigManager: - """配置管理器:负责加载和管理配置""" - - def __init__(self, config_file: str = "config.yaml"): - self.config_file = config_file - self.config = self._load_config() - self._init_directories() - - def _load_config(self) -> Dict: - """加载配置文件""" - try: - with open(self.config_file, 'r', encoding='utf-8') as f: - return yaml.safe_load(f) - except Exception as e: - print(f"加载配置文件失败: {str(e)}") - return {} - - def _init_directories(self): - """初始化所有必要的目录""" - # 创建输出基础目录 - os.makedirs(self.output_base_dir, exist_ok=True) - # 创建报告目录 - os.makedirs(self.reports_dir, exist_ok=True) - # 创建树文件目录 - os.makedirs(self.trees_dir, exist_ok=True) - # 创建临时文件目录 - os.makedirs(self.temp_dir, exist_ok=True) - - @property - def tree_max_depth(self) -> int: - """获取目录树最大深度""" - return self.config.get('tree', {}).get('max_depth', 4) - - @property - def max_file_size(self) -> int: - """获取最大文件大小""" - return self.config.get('file', {}).get('max_size', 10 * 1024 * 1024) - - @property - def file_encoding(self) -> str: - """获取文件编码""" - return self.config.get('file', {}).get('encoding', 'utf-8') - - # 输入路径相关 - @property - def input_base_dir(self) -> str: - """获取输入基础目录""" - return self.config.get('paths', {}).get('input', {}).get('base_dir', os.getcwd()) - - @property - def github_repo(self) -> str: - """获取默认GitHub仓库地址""" - return self.config.get('paths', {}).get('input', {}).get('github', '') - - @property - def gitlab_repo(self) -> str: - """获取默认GitLab仓库地址""" - return self.config.get('paths', {}).get('input', {}).get('gitlab', '') - - # 输出路径相关 - @property - def output_base_dir(self) -> str: - """获取输出基础目录""" - return self.config.get('paths', {}).get('output', {}).get('base_dir', 'output') - - @property - def reports_dir(self) -> str: - """获取报告目录""" - reports = self.config.get('paths', {}).get('output', {}).get('reports', 'reports') - return os.path.join(self.output_base_dir, reports) - - @property - def trees_dir(self) -> str: - """获取树文件目录""" - trees = self.config.get('paths', {}).get('output', {}).get('trees', 'trees') - return os.path.join(self.output_base_dir, trees) - - @property - def temp_dir(self) -> str: - """获取临时文件目录""" - temp = self.config.get('paths', {}).get('output', {}).get('temp', 'temp') - return os.path.join(self.output_base_dir, temp) - - @property - def supported_formats(self) -> List[str]: - """获取支持的输出格式""" - return self.config.get('output', {}).get('formats', ['md']) - - @property - def default_format(self) -> str: - """获取默认输出格式""" - return self.config.get('output', {}).get('default_format', 'md') - - def get_output_file(self, format_type: str) -> str: - """获取指定格式的输出文件名""" - files = self.config.get('output', {}).get('files', {}) - return files.get(format_type, f'analysis_result.{format_type}') - - def get_output_path(self, filename: str, output_type: str = 'reports') -> str: - """ - 获取输出文件的完整路径 - :param filename: 文件名 - :param output_type: 输出类型(reports/trees/temp) - :return: 完整路径 - """ - if output_type == 'reports': - base_dir = self.reports_dir - elif output_type == 'trees': - base_dir = self.trees_dir - elif output_type == 'temp': - base_dir = self.temp_dir - else: - base_dir = self.output_base_dir - - return os.path.join(base_dir, filename) - - @property - def content_preview_length(self) -> int: - """获取内容预览长度""" - return self.config.get('content', {}).get('preview_length', 1000) - - @property - def supported_sources(self) -> List[str]: - """获取支持的输入源类型""" - return self.config.get('input', {}).get('supported_sources', ['local']) - - @property - def default_source(self) -> str: - """获取默认输入源类型""" - return self.config.get('input', {}).get('default_source', 'local') - - def validate_format(self, format_type: str) -> bool: - """验证输出格式是否支持""" - return format_type in self.supported_formats - - def validate_source(self, source_type: str) -> bool: - """验证输入源类型是否支持""" - return source_type in self.supported_sources diff --git a/smart_analysis.py b/smart_analysis.py index 9683a9fb..61479973 100644 --- a/smart_analysis.py +++ b/smart_analysis.py @@ -1,248 +1,264 @@ -import os -import subprocess -import json import argparse -from typing import List, Tuple, Dict -from gitingest.ingest import ingest +import json +import subprocess +from typing import Any + from config_manager import ConfigManager +from gitingest.ingest import ingest -# 加载配置 +# Load configuration config = ConfigManager() -def generate_tree(directory: str, max_depth: int = None) -> str: + +def generate_tree(directory: str, max_depth: int | None = None) -> str: """ - 生成目录树结构 - :param directory: 要分析的目录路径 - :param max_depth: 树的最大深度 - :return: 目录树的字符串表示 + Generate a directory tree structure + + Parameters + ---------- + directory : str + Directory path to analyze. + max_depth : int | None + Maximum depth of the tree, by default None. + + Returns + ------- + str + String representation of the directory tree. """ if max_depth is None: max_depth = config.tree_max_depth - + try: - result = subprocess.run( - ['tree', '-L', str(max_depth)], - cwd=directory, - capture_output=True, - text=True - ) + result = subprocess.run(["tree", "-L", str(max_depth)], cwd=directory, capture_output=True, text=True) return result.stdout except Exception as e: - return f"生成目录树时出错: {str(e)}" + return f"Failed to generate tree: {e}" -def analyze_tree_and_suggest_patterns(tree_output: str) -> Tuple[List[str], List[str]]: + +def analyze_tree_and_suggest_patterns(tree_output: str) -> tuple[list[str], list[str]]: """ - 分析目录树并建议包含和排除模式 - :param tree_output: 目录树字符串 - :return: (包含模式列表, 排除模式列表) + Analyze the directory tree and suggest include/exclude patterns + + Parameters + ---------- + tree_output : str + String representation of the directory tree. + + Returns + ------- + tuple[list[str], list[str]] + List of include patterns and list of exclude patterns. """ - # 针对markdown文档的包含模式 + # include_patterns = [ - # 核心源代码 - "**/*.py", # Python源代码 - - # 文档和配置 - "README.md", # 主要文档 - "CHANGELOG.md", # 变更日志 - "LICENSE", # 许可证 - "requirements.txt", # Python依赖 - "pyproject.toml", # Python项目配置 - "setup.py", # 安装配置 - "setup.cfg", # 安装配置 - "MANIFEST.in", # 打包配置 - - # 核心文档(选择性包含) - "docs/**/*.md", # 文档目录下的markdown文件 + "**/*.py", + "README.md", + "CHANGELOG.md", + "LICENSE", + "requirements.txt", + "pyproject.toml", + "setup.py", + "setup.cfg", + "MANIFEST.in", + "docs/**/*.md", ] - - # 排除模式 - 更细致的控制 + exclude_patterns = [ - # 二进制和生成文件 - "**/*.pyc", # Python编译文件 - "**/__pycache__/**", # Python缓存 - "**/*.so", # 编译的扩展模块 - "**/*.pyd", # Windows下的Python扩展模块 - "**/*.dll", # Windows动态链接库 - "**/*.dylib", # Mac动态链接库 - "**/*.egg", # Python打包文件 - "**/*.whl", # Python wheel包 - "**/*.exe", # 可执行文件 - - # 媒体文件 - "**/*.png", # PNG图片 - "**/*.jpg", # JPG图片 - "**/*.jpeg", # JPEG图片 - "**/*.gif", # GIF图片 - "**/*.ico", # 图标文件 - "**/*.svg", # SVG图片 - "**/*.mp4", # 视频文件 - "**/*.mov", # 视频文件 - "**/*.avi", # 视频文件 - "**/*.mp3", # 音频文件 - "**/*.wav", # 音频文件 - - # 开发工具和临时文件 - "**/.git/**", # Git目录 - "**/.idea/**", # PyCharm配置 - "**/.vscode/**", # VSCode配置 - "**/.env", # 环境变量 - "**/.env.*", # 环境变量文件 - "**/node_modules/**", # Node.js模块 - "**/venv/**", # Python虚拟环境 - "**/env/**", # Python虚拟环境 - "**/build/**", # 构建目录 - "**/dist/**", # 分发目录 - "**/.pytest_cache/**", # Pytest缓存 - "**/.coverage", # 测试覆盖率文件 - "**/htmlcov/**", # 测试覆盖率报告 - - # 编译和打包相关 - "**/*.min.js", # 压缩的JS文件 - "**/*.min.css", # 压缩的CSS文件 - "**/*.map", # Source map文件 - "**/webpack.stats.json", # Webpack统计文件 - - # UI构建文件 - "**/ui/**/*.js", # UI JavaScript文件 - "**/ui/**/*.css", # UI样式文件 - "**/ui/build/**", # UI构建输出 - "**/ui/dist/**", # UI分发文件 - - # 测试文件(可选,取决于是否需要包含测试文档) - "**/tests/**", # 测试目录 - "**/test_*.py", # 测试文件 - "**/*_test.py", # 测试文件 - - # Jupyter notebooks(可选) - "**/*.ipynb", # Jupyter笔记本 - "**/.ipynb_checkpoints/**", # Jupyter检查点 + "**/*.pyc", + "**/__pycache__/**", + "**/*.so" "**/*.pyd", + "**/*.dll", + "**/*.dylib", + "**/*.egg", + "**/*.whl", + "**/*.exe", + "**/*.png", + "**/*.jpg", + "**/*.jpeg", + "**/*.gif", + "**/*.ico", + "**/*.svg", + "**/*.mp4", + "**/*.mov", + "**/*.avi", + "**/*.mp3", + "**/*.wav", + "**/.git/**", + "**/.idea/**", + "**/.vscode/**", + "**/.env", + "**/.env.*", + "**/node_modules/**", + "**/venv/**", + "**/env/**", + "**/build/**", + "**/dist/**", + "**/.pytest_cache/**", + "**/.coverage", + "**/htmlcov/**", + "**/*.min.js", + "**/*.min.css", + "**/*.map", + "**/webpack.stats.json", + "**/ui/**/*.js", + "**/ui/**/*.css", + "**/ui/build/**", + "**/ui/dist/**", + "**/tests/**", + "**/test_*.py", + "**/*_test.py", + "**/*.ipynb", + "**/.ipynb_checkpoints/**", ] - + return include_patterns, exclude_patterns -def smart_ingest(directory: str, max_file_size: int = None, output_format: str = None) -> Dict: + +def smart_ingest(directory: str, max_file_size: int | None = None, output_format: str | None = None) -> dict[str, Any]: """ - 智能分析目录并生成报告 - :param directory: 要分析的目录路径 - :param max_file_size: 最大文件大小限制 - :param output_format: 输出格式(md/json/txt) - :return: 分析报告字典 + Perform smart ingest analysis on the given directory and return the report. + + Parameters + ---------- + directory : str + Directory path to analyze. + max_file_size : int + Maximum file size to analyze, by default None. + output_format : str + Output format of the report, by default None. + + Returns + ------- + dict[str, Any] + Report of the analysis. + + Raises + ------ + ValueError + If the output format is not supported. """ if max_file_size is None: max_file_size = config.max_file_size if output_format is None: output_format = config.default_format - + if not config.validate_format(output_format): - raise ValueError(f"不支持的输出格式: {output_format}") - - # 步骤1:生成目录树 - print("步骤1: 生成目录树...") + raise ValueError(f"Unsupported output format: {output_format}") + + # Step 1: Generate directory tree + print("Step 1: Generating directory tree...", end="\n\n") tree_output = generate_tree(directory) print(tree_output) - - # 保存目录树 - tree_file = config.get_output_path(config.get_output_file('tree'), 'trees') - with open(tree_file, 'w', encoding=config.file_encoding) as f: + + # Save the tree output to a file + tree_file = config.get_output_path(config.get_output_file("tree"), "trees") + with open(tree_file, "w", encoding=config.file_encoding) as f: f.write(tree_output) - - # 步骤2:分析树结构并建议过滤模式 - print("\n步骤2: 分析目录结构并生成建议...") + + # Step 2: Analyze the directory structure and suggest filter patterns + print("Step 2: Analyzing directory structure and suggesting filter patterns...") include_patterns, exclude_patterns = analyze_tree_and_suggest_patterns(tree_output) - - print("建议的包含模式:") + + print("Suggested include patterns:", end="\n\n") for pattern in include_patterns: print(f" - {pattern}") - - print("\n建议的排除模式:") + + print("Suggested exclude patterns:", end="\n\n") for pattern in exclude_patterns: print(f" - {pattern}") - - # 步骤3:执行ingest - print("\n步骤3: 执行文件分析...") + + # Step 3: Execute file analysis + print("\nStep 3: Executing file analysis...") try: summary, tree, content = ingest( source=directory, max_file_size=max_file_size, include_patterns=include_patterns, exclude_patterns=exclude_patterns, - output=config.get_output_path(config.get_output_file(output_format), 'reports') + output=config.get_output_path(config.get_output_file(output_format), "reports"), ) - - # 返回完整报告 + + # Report report = { "directory_tree": tree_output, "suggested_patterns": { "include": include_patterns, - "exclude": exclude_patterns + "exclude": exclude_patterns, }, "analysis_result": { "summary": summary, "tree": tree, - "content": content[:config.content_preview_length] + "..." - if len(content) > config.content_preview_length else content - } + "content": ( + content[: config.content_preview_length] + "..." + if len(content) > config.content_preview_length + else content + ), + }, } - - # 保存JSON报告 - json_file = config.get_output_path(config.get_output_file('json'), 'reports') + + # Save the report to a JSON file + json_file = config.get_output_path(config.get_output_file("json"), "reports") with open(json_file, "w", encoding=config.file_encoding) as f: json.dump(report, f, ensure_ascii=False, indent=2) - + return report - + except Exception as e: return { - "error": f"分析过程中出错: {str(e)}", + "error": f"分析过程中出错: {e}", "directory_tree": tree_output, "suggested_patterns": { "include": include_patterns, - "exclude": exclude_patterns - } + "exclude": exclude_patterns, + }, } -if __name__ == "__main__": - # 配置命令行参数 - parser = argparse.ArgumentParser(description="智能代码仓库分析工具") - parser.add_argument("--source", "-s", type=str, - default=config.input_base_dir, - help="要分析的源目录路径") - parser.add_argument("--source-type", "-t", type=str, - choices=config.supported_sources, - default=config.default_source, - help="输入源类型") - parser.add_argument("--max-depth", "-d", type=int, - default=config.tree_max_depth, - help="目录树最大深度") - parser.add_argument("--max-size", "-m", type=int, - default=config.max_file_size, - help="最大文件大小(bytes)") - parser.add_argument("--output-dir", "-o", type=str, - default=config.output_base_dir, - help="输出基础目录") - parser.add_argument("--format", "-f", type=str, - choices=config.supported_formats, - default=config.default_format, - help="输出格式") - - args = parser.parse_args() - - print(f"开始分析目录: {args.source}") - print(f"配置信息:") - print(f"- 输入源类型: {args.source_type}") - print(f"- 目录树深度: {args.max_depth}") - print(f"- 最大文件大小: {args.max_size / 1024 / 1024:.2f}MB") - print(f"- 输出基础目录: {args.output_dir}") - print(f"- 输出格式: {args.format}") - - # 执行分析 - result = smart_ingest( - directory=args.source, - max_file_size=args.max_size, - output_format=args.format + +def main() -> None: + # Parse command-line arguments + parser = argparse.ArgumentParser(description="Intelligent directory analysis tool") + parser.add_argument("--source", "-s", type=str, default=config.input_base_dir, help="Directory to analyze") + parser.add_argument( + "--source-type", + "-t", + type=str, + choices=config.supported_sources, + default=config.default_source, + help="Input source type", + ) + parser.add_argument("--max-depth", "-d", type=int, default=config.tree_max_depth, help="Maximum depth of the tree") + parser.add_argument( + "--max-size", "-m", type=int, default=config.max_file_size, help="Maximum file size to analyze" + ) + parser.add_argument("--output-dir", "-o", type=str, default=config.output_base_dir, help="Output base directory") + parser.add_argument( + "--format", + "-f", + type=str, + choices=config.supported_formats, + default=config.default_format, + help="Output format of the report", ) - - print(f"\n分析完成!输出文件:") - print(f"1. 目录树: {config.get_output_path(config.get_output_file('tree'), 'trees')}") - print(f"2. 分析报告: {config.get_output_path(config.get_output_file(args.format), 'reports')}") - print(f"3. JSON报告: {config.get_output_path(config.get_output_file('json'), 'reports')}") + + args = parser.parse_args() + + print(f"Start analyzing directory: {args.source}") + print(f"Configuration information:") + print(f"- Input source type: {args.source_type}") + print(f"- Directory tree depth: {args.max_depth}") + print(f"- Maximum file size: {args.max_size / 1024 / 1024:.2f}MB") + print(f"- Output base directory: {args.output_dir}") + print(f"- Output format: {args.format}") + + # Perform smart ingest analysis + result = smart_ingest(directory=args.source, max_file_size=args.max_size, output_format=args.format) + + print(f"\nAnalysis completed! Output file:") + print(f"1. Directory tree: {config.get_output_path(config.get_output_file('tree'), 'trees')}") + print(f"2. Analysis report: {config.get_output_path(config.get_output_file(args.format), 'reports')}") + print(f"3. JSON report: {config.get_output_path(config.get_output_file('json'), 'reports')}") + + +if __name__ == "__main__": + main() diff --git a/src/config_manager.py b/src/config_manager.py new file mode 100644 index 00000000..1cf85592 --- /dev/null +++ b/src/config_manager.py @@ -0,0 +1,312 @@ +""" Configuration manager module for loading and managing configuration. """ + +import os +from typing import Any + +import yaml + + +class ConfigManager: + """Configuration manager class for loading and managing configuration.""" + + def __init__(self, config_file: str = "config.yaml") -> None: + """ + Initialize the configuration manager. + + Parameters + ---------- + config_file : str, optional + The configuration file path, by default "config.yaml". + """ + self.config_file = config_file + self.config = self._load_config() + self._init_directories() + + def _load_config(self) -> dict[str, Any]: + """ + Load configuration from the config file. + + Returns + ------- + dict[str, Any] + The configuration dictionary. + """ + try: + with open(self.config_file, encoding="utf-8") as f: + return yaml.safe_load(f) + except Exception as e: + print(f"Failed to load configuration file: {e}") + return {} + + def _init_directories(self) -> None: + """ + Initialize directories based on the configuration. + """ + # Create output base directory + os.makedirs(self.output_base_dir, exist_ok=True) + # Create reports directory + os.makedirs(self.reports_dir, exist_ok=True) + # Create trees directory + os.makedirs(self.trees_dir, exist_ok=True) + # Create temporary directory + os.makedirs(self.temp_dir, exist_ok=True) + + @property + def tree_max_depth(self) -> int: + """ + Get the maximum depth of the tree. + + Returns + ------- + int + The maximum depth of the tree. + """ + return self.config.get("tree", {}).get("max_depth", 4) + + @property + def max_file_size(self) -> int: + """ + Get the maximum file size. + + Returns + ------- + int + The maximum file size. + """ + return self.config.get("file", {}).get("max_size", 10 * 1024 * 1024) + + @property + def file_encoding(self) -> str: + """ + Get the file encoding. + + Returns + ------- + str + The file encoding. + """ + return self.config.get("file", {}).get("encoding", "utf-8") + + # Input path related + @property + def input_base_dir(self) -> str: + """ + Get the input base directory. + + Returns + ------- + str + The input base directory. + """ + return self.config.get("paths", {}).get("input", {}).get("base_dir", os.getcwd()) + + @property + def github_repo(self) -> str: + """ + Get the default GitHub repository address. + + Returns + ------- + str + The default GitHub repository address. + """ + return self.config.get("paths", {}).get("input", {}).get("github", "") + + @property + def gitlab_repo(self) -> str: + """ + Get the default GitLab repository address. + + Returns + ------- + str + The default GitLab repository address. + """ + return self.config.get("paths", {}).get("input", {}).get("gitlab", "") + + @property + def output_base_dir(self) -> str: + """ + Get the output base directory. + + Returns + ------- + str + The output base directory. + """ + return self.config.get("paths", {}).get("output", {}).get("base_dir", "output") + + @property + def reports_dir(self) -> str: + """ + Get the reports directory. + + Returns + ------- + str + The reports directory. + """ + reports = self.config.get("paths", {}).get("output", {}).get("reports", "reports") + return os.path.join(self.output_base_dir, reports) + + @property + def trees_dir(self) -> str: + """ + Get the trees directory." + + Returns + ------- + str + The trees directory. + """ + trees = self.config.get("paths", {}).get("output", {}).get("trees", "trees") + return os.path.join(self.output_base_dir, trees) + + @property + def temp_dir(self) -> str: + """ + Get the temporary directory. + + Returns + ------- + str + The temporary directory. + """ + temp = self.config.get("paths", {}).get("output", {}).get("temp", "temp") + return os.path.join(self.output_base_dir, temp) + + @property + def supported_formats(self) -> list[str]: + """ + Get the supported output formats. + + Returns + ------- + list[str] + The supported output formats. + """ + return self.config.get("output", {}).get("formats", ["md"]) + + @property + def default_format(self) -> str: + """ + Get the default output format. + + Returns + ------- + str + The default output format. + """ + return self.config.get("output", {}).get("default_format", "md") + + def get_output_file(self, format_type: str) -> str: + """ + Get the output file name based on the format type. + + Parameters + ---------- + format_type : str + The format type. + + Returns + ------- + str + The output file name. + """ + files = self.config.get("output", {}).get("files", {}) + return files.get(format_type, f"analysis_result.{format_type}") + + def get_output_path(self, filename: str, output_type: str = "reports") -> str: + """ + Get the full output path based on the filename and output type. + + Parameters + ---------- + filename : str + The filename to be used. + output_type : str, optional + The type of output (reports, trees, temp), by default "reports". + + Returns + ------- + str + The full output path. + """ + if output_type == "reports": + base_dir = self.reports_dir + elif output_type == "trees": + base_dir = self.trees_dir + elif output_type == "temp": + base_dir = self.temp_dir + else: + base_dir = self.output_base_dir + + return os.path.join(base_dir, filename) + + @property + def content_preview_length(self) -> int: + """ + Get the content preview length. + + Returns + ------- + int + The content preview length. + """ + return self.config.get("content", {}).get("preview_length", 1000) + + @property + def supported_sources(self) -> list[str]: + """ + Get the supported input sources. + + Returns + ------- + list[str] + The supported input sources. + """ + return self.config.get("input", {}).get("supported_sources", ["local"]) + + @property + def default_source(self) -> str: + """ + Get the default input source type. + + Returns + ------- + str + The default input source type. + """ + return self.config.get("input", {}).get("default_source", "local") + + def validate_format(self, format_type: str) -> bool: + """ + Validate if the output format is supported. + + Parameters + ---------- + format_type : str + The output format type. + + Returns + ------- + bool + True if the format is supported, False otherwise. + """ + return format_type in self.supported_formats + + def validate_source(self, source_type: str) -> bool: + """ + Validate if the input source is supported. + + Parameters + ---------- + source_type : str + The input source type. + + Returns + ------- + bool + True if the source is supported, False otherwise. + """ + return source_type in self.supported_sources