From b8439e352ed7fd2af8c30f9ef2aab42320b661a6 Mon Sep 17 00:00:00 2001 From: clark874 <86122040+clark874@users.noreply.github.com> Date: Wed, 1 Jan 2025 20:00:06 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=E6=99=BA=E8=83=BD?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=E4=BB=93=E5=BA=93=E5=88=86=E6=9E=90=E5=B7=A5?= =?UTF-8?q?=E5=85=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 添加配置文件管理系统 2. 添加智能分析工具 3. 优化输出路径管理 --- config.yaml | 45 +++++++++ config_manager.py | 142 ++++++++++++++++++++++++++ smart_analysis.py | 248 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 435 insertions(+) create mode 100644 config.yaml create mode 100644 config_manager.py create mode 100644 smart_analysis.py diff --git a/config.yaml b/config.yaml new file mode 100644 index 00000000..ff70283f --- /dev/null +++ b/config.yaml @@ -0,0 +1,45 @@ +# 输入配置 +input: + supported_sources: + - local + - github + - gitlab + default_source: local + +# 路径配置 +paths: + input: + base_dir: . # 默认使用当前目录 + github: "" # 默认GitHub仓库 + gitlab: "" # 默认GitLab仓库 + output: + base_dir: output # 输出基础目录 + reports: reports # 报告目录 + trees: trees # 目录树文件 + temp: temp # 临时文件 + +# 树形结构配置 +tree: + max_depth: 4 # 目录树最大深度 + +# 文件配置 +file: + max_size: 10485760 # 10MB + encoding: utf-8 # 文件编码 + +# 输出配置 +output: + formats: + - md + - json + - txt + default_format: md + files: + md: analysis_report.md + json: analysis_report.json + txt: analysis_report.txt + tree: directory_tree.txt + +# 内容配置 +content: + preview_length: 1000 # 内容预览长度(字符数) diff --git a/config_manager.py b/config_manager.py new file mode 100644 index 00000000..b563ce39 --- /dev/null +++ b/config_manager.py @@ -0,0 +1,142 @@ +import os +import yaml +from typing import Dict, List, Optional + +class ConfigManager: + """配置管理器:负责加载和管理配置""" + + def __init__(self, config_file: str = "config.yaml"): + self.config_file = config_file + self.config = self._load_config() + self._init_directories() + + def _load_config(self) -> Dict: + """加载配置文件""" + try: + with open(self.config_file, 'r', encoding='utf-8') as f: + return yaml.safe_load(f) + except Exception as e: + print(f"加载配置文件失败: {str(e)}") + return {} + + def _init_directories(self): + """初始化所有必要的目录""" + # 创建输出基础目录 + os.makedirs(self.output_base_dir, exist_ok=True) + # 创建报告目录 + os.makedirs(self.reports_dir, exist_ok=True) + # 创建树文件目录 + os.makedirs(self.trees_dir, exist_ok=True) + # 创建临时文件目录 + os.makedirs(self.temp_dir, exist_ok=True) + + @property + def tree_max_depth(self) -> int: + """获取目录树最大深度""" + return self.config.get('tree', {}).get('max_depth', 4) + + @property + def max_file_size(self) -> int: + """获取最大文件大小""" + return self.config.get('file', {}).get('max_size', 10 * 1024 * 1024) + + @property + def file_encoding(self) -> str: + """获取文件编码""" + return self.config.get('file', {}).get('encoding', 'utf-8') + + # 输入路径相关 + @property + def input_base_dir(self) -> str: + """获取输入基础目录""" + return self.config.get('paths', {}).get('input', {}).get('base_dir', os.getcwd()) + + @property + def github_repo(self) -> str: + """获取默认GitHub仓库地址""" + return self.config.get('paths', {}).get('input', {}).get('github', '') + + @property + def gitlab_repo(self) -> str: + """获取默认GitLab仓库地址""" + return self.config.get('paths', {}).get('input', {}).get('gitlab', '') + + # 输出路径相关 + @property + def output_base_dir(self) -> str: + """获取输出基础目录""" + return self.config.get('paths', {}).get('output', {}).get('base_dir', 'output') + + @property + def reports_dir(self) -> str: + """获取报告目录""" + reports = self.config.get('paths', {}).get('output', {}).get('reports', 'reports') + return os.path.join(self.output_base_dir, reports) + + @property + def trees_dir(self) -> str: + """获取树文件目录""" + trees = self.config.get('paths', {}).get('output', {}).get('trees', 'trees') + return os.path.join(self.output_base_dir, trees) + + @property + def temp_dir(self) -> str: + """获取临时文件目录""" + temp = self.config.get('paths', {}).get('output', {}).get('temp', 'temp') + return os.path.join(self.output_base_dir, temp) + + @property + def supported_formats(self) -> List[str]: + """获取支持的输出格式""" + return self.config.get('output', {}).get('formats', ['md']) + + @property + def default_format(self) -> str: + """获取默认输出格式""" + return self.config.get('output', {}).get('default_format', 'md') + + def get_output_file(self, format_type: str) -> str: + """获取指定格式的输出文件名""" + files = self.config.get('output', {}).get('files', {}) + return files.get(format_type, f'analysis_result.{format_type}') + + def get_output_path(self, filename: str, output_type: str = 'reports') -> str: + """ + 获取输出文件的完整路径 + :param filename: 文件名 + :param output_type: 输出类型(reports/trees/temp) + :return: 完整路径 + """ + if output_type == 'reports': + base_dir = self.reports_dir + elif output_type == 'trees': + base_dir = self.trees_dir + elif output_type == 'temp': + base_dir = self.temp_dir + else: + base_dir = self.output_base_dir + + return os.path.join(base_dir, filename) + + @property + def content_preview_length(self) -> int: + """获取内容预览长度""" + return self.config.get('content', {}).get('preview_length', 1000) + + @property + def supported_sources(self) -> List[str]: + """获取支持的输入源类型""" + return self.config.get('input', {}).get('supported_sources', ['local']) + + @property + def default_source(self) -> str: + """获取默认输入源类型""" + return self.config.get('input', {}).get('default_source', 'local') + + def validate_format(self, format_type: str) -> bool: + """验证输出格式是否支持""" + return format_type in self.supported_formats + + def validate_source(self, source_type: str) -> bool: + """验证输入源类型是否支持""" + return source_type in self.supported_sources diff --git a/smart_analysis.py b/smart_analysis.py new file mode 100644 index 00000000..9683a9fb --- /dev/null +++ b/smart_analysis.py @@ -0,0 +1,248 @@ +import os +import subprocess +import json +import argparse +from typing import List, Tuple, Dict +from gitingest.ingest import ingest +from config_manager import ConfigManager + +# 加载配置 +config = ConfigManager() + +def generate_tree(directory: str, max_depth: int = None) -> str: + """ + 生成目录树结构 + :param directory: 要分析的目录路径 + :param max_depth: 树的最大深度 + :return: 目录树的字符串表示 + """ + if max_depth is None: + max_depth = config.tree_max_depth + + try: + result = subprocess.run( + ['tree', '-L', str(max_depth)], + cwd=directory, + capture_output=True, + text=True + ) + return result.stdout + except Exception as e: + return f"生成目录树时出错: {str(e)}" + +def analyze_tree_and_suggest_patterns(tree_output: str) -> Tuple[List[str], List[str]]: + """ + 分析目录树并建议包含和排除模式 + :param tree_output: 目录树字符串 + :return: (包含模式列表, 排除模式列表) + """ + # 针对markdown文档的包含模式 + include_patterns = [ + # 核心源代码 + "**/*.py", # Python源代码 + + # 文档和配置 + "README.md", # 主要文档 + "CHANGELOG.md", # 变更日志 + "LICENSE", # 许可证 + "requirements.txt", # Python依赖 + "pyproject.toml", # Python项目配置 + "setup.py", # 安装配置 + "setup.cfg", # 安装配置 + "MANIFEST.in", # 打包配置 + + # 核心文档(选择性包含) + "docs/**/*.md", # 文档目录下的markdown文件 + ] + + # 排除模式 - 更细致的控制 + exclude_patterns = [ + # 二进制和生成文件 + "**/*.pyc", # Python编译文件 + "**/__pycache__/**", # Python缓存 + "**/*.so", # 编译的扩展模块 + "**/*.pyd", # Windows下的Python扩展模块 + "**/*.dll", # Windows动态链接库 + "**/*.dylib", # Mac动态链接库 + "**/*.egg", # Python打包文件 + "**/*.whl", # Python wheel包 + "**/*.exe", # 可执行文件 + + # 媒体文件 + "**/*.png", # PNG图片 + "**/*.jpg", # JPG图片 + "**/*.jpeg", # JPEG图片 + "**/*.gif", # GIF图片 + "**/*.ico", # 图标文件 + "**/*.svg", # SVG图片 + "**/*.mp4", # 视频文件 + "**/*.mov", # 视频文件 + "**/*.avi", # 视频文件 + "**/*.mp3", # 音频文件 + "**/*.wav", # 音频文件 + + # 开发工具和临时文件 + "**/.git/**", # Git目录 + "**/.idea/**", # PyCharm配置 + "**/.vscode/**", # VSCode配置 + "**/.env", # 环境变量 + "**/.env.*", # 环境变量文件 + "**/node_modules/**", # Node.js模块 + "**/venv/**", # Python虚拟环境 + "**/env/**", # Python虚拟环境 + "**/build/**", # 构建目录 + "**/dist/**", # 分发目录 + "**/.pytest_cache/**", # Pytest缓存 + "**/.coverage", # 测试覆盖率文件 + "**/htmlcov/**", # 测试覆盖率报告 + + # 编译和打包相关 + "**/*.min.js", # 压缩的JS文件 + "**/*.min.css", # 压缩的CSS文件 + "**/*.map", # Source map文件 + "**/webpack.stats.json", # Webpack统计文件 + + # UI构建文件 + "**/ui/**/*.js", # UI JavaScript文件 + "**/ui/**/*.css", # UI样式文件 + "**/ui/build/**", # UI构建输出 + "**/ui/dist/**", # UI分发文件 + + # 测试文件(可选,取决于是否需要包含测试文档) + "**/tests/**", # 测试目录 + "**/test_*.py", # 测试文件 + "**/*_test.py", # 测试文件 + + # Jupyter notebooks(可选) + "**/*.ipynb", # Jupyter笔记本 + "**/.ipynb_checkpoints/**", # Jupyter检查点 + ] + + return include_patterns, exclude_patterns + +def smart_ingest(directory: str, max_file_size: int = None, output_format: str = None) -> Dict: + """ + 智能分析目录并生成报告 + :param directory: 要分析的目录路径 + :param max_file_size: 最大文件大小限制 + :param output_format: 输出格式(md/json/txt) + :return: 分析报告字典 + """ + if max_file_size is None: + max_file_size = config.max_file_size + if output_format is None: + output_format = config.default_format + + if not config.validate_format(output_format): + raise ValueError(f"不支持的输出格式: {output_format}") + + # 步骤1:生成目录树 + print("步骤1: 生成目录树...") + tree_output = generate_tree(directory) + print(tree_output) + + # 保存目录树 + tree_file = config.get_output_path(config.get_output_file('tree'), 'trees') + with open(tree_file, 'w', encoding=config.file_encoding) as f: + f.write(tree_output) + + # 步骤2:分析树结构并建议过滤模式 + print("\n步骤2: 分析目录结构并生成建议...") + include_patterns, exclude_patterns = analyze_tree_and_suggest_patterns(tree_output) + + print("建议的包含模式:") + for pattern in include_patterns: + print(f" - {pattern}") + + print("\n建议的排除模式:") + for pattern in exclude_patterns: + print(f" - {pattern}") + + # 步骤3:执行ingest + print("\n步骤3: 执行文件分析...") + try: + summary, tree, content = ingest( + source=directory, + max_file_size=max_file_size, + include_patterns=include_patterns, + exclude_patterns=exclude_patterns, + output=config.get_output_path(config.get_output_file(output_format), 'reports') + ) + + # 返回完整报告 + report = { + "directory_tree": tree_output, + "suggested_patterns": { + "include": include_patterns, + "exclude": exclude_patterns + }, + "analysis_result": { + "summary": summary, + "tree": tree, + "content": content[:config.content_preview_length] + "..." + if len(content) > config.content_preview_length else content + } + } + + # 保存JSON报告 + json_file = config.get_output_path(config.get_output_file('json'), 'reports') + with open(json_file, "w", encoding=config.file_encoding) as f: + json.dump(report, f, ensure_ascii=False, indent=2) + + return report + + except Exception as e: + return { + "error": f"分析过程中出错: {str(e)}", + "directory_tree": tree_output, + "suggested_patterns": { + "include": include_patterns, + "exclude": exclude_patterns + } + } + +if __name__ == "__main__": + # 配置命令行参数 + parser = argparse.ArgumentParser(description="智能代码仓库分析工具") + parser.add_argument("--source", "-s", type=str, + default=config.input_base_dir, + help="要分析的源目录路径") + parser.add_argument("--source-type", "-t", type=str, + choices=config.supported_sources, + default=config.default_source, + help="输入源类型") + parser.add_argument("--max-depth", "-d", type=int, + default=config.tree_max_depth, + help="目录树最大深度") + parser.add_argument("--max-size", "-m", type=int, + default=config.max_file_size, + help="最大文件大小(bytes)") + parser.add_argument("--output-dir", "-o", type=str, + default=config.output_base_dir, + help="输出基础目录") + parser.add_argument("--format", "-f", type=str, + choices=config.supported_formats, + default=config.default_format, + help="输出格式") + + args = parser.parse_args() + + print(f"开始分析目录: {args.source}") + print(f"配置信息:") + print(f"- 输入源类型: {args.source_type}") + print(f"- 目录树深度: {args.max_depth}") + print(f"- 最大文件大小: {args.max_size / 1024 / 1024:.2f}MB") + print(f"- 输出基础目录: {args.output_dir}") + print(f"- 输出格式: {args.format}") + + # 执行分析 + result = smart_ingest( + directory=args.source, + max_file_size=args.max_size, + output_format=args.format + ) + + print(f"\n分析完成!输出文件:") + print(f"1. 目录树: {config.get_output_path(config.get_output_file('tree'), 'trees')}") + print(f"2. 分析报告: {config.get_output_path(config.get_output_file(args.format), 'reports')}") + print(f"3. JSON报告: {config.get_output_path(config.get_output_file('json'), 'reports')}")