diff --git a/config.yaml b/config.yaml new file mode 100644 index 00000000..f362f782 --- /dev/null +++ b/config.yaml @@ -0,0 +1,45 @@ +# Input configuration +input: + supported_sources: + - local + - github + - gitlab + default_source: local + +# Path configuration +paths: + input: + base_dir: . # Use the current directory by default + github: "" # Default GitHub repository + gitlab: "" # Default GitLab repository + output: + base_dir: output # Output base directory + reports: reports # Report directory + trees: trees # Directory tree file + temp: temp # Temporary file + +# Tree structure configuration +tree: + max_depth: 4 # Maximum depth of directory tree + +# File configuration +file: + max_size: 10485760 # 10MB + encoding: utf-8 # File encoding + +# Output configuration +output: + formats: + - md + - json + - txt + default_format: md + files: + md: analysis_report.md + json: analysis_report.json + txt: analysis_report.txt + tree: directory_tree.txt + +# Content configuration +content: + preview_length: 1000 # Content preview length (number of characters) diff --git a/smart_analysis.py b/smart_analysis.py new file mode 100644 index 00000000..61479973 --- /dev/null +++ b/smart_analysis.py @@ -0,0 +1,264 @@ +import argparse +import json +import subprocess +from typing import Any + +from config_manager import ConfigManager +from gitingest.ingest import ingest + +# Load configuration +config = ConfigManager() + + +def generate_tree(directory: str, max_depth: int | None = None) -> str: + """ + Generate a directory tree structure + + Parameters + ---------- + directory : str + Directory path to analyze. + max_depth : int | None + Maximum depth of the tree, by default None. + + Returns + ------- + str + String representation of the directory tree. + """ + if max_depth is None: + max_depth = config.tree_max_depth + + try: + result = subprocess.run(["tree", "-L", str(max_depth)], cwd=directory, capture_output=True, text=True) + return result.stdout + except Exception as e: + return f"Failed to generate tree: {e}" + + +def analyze_tree_and_suggest_patterns(tree_output: str) -> tuple[list[str], list[str]]: + """ + Analyze the directory tree and suggest include/exclude patterns + + Parameters + ---------- + tree_output : str + String representation of the directory tree. + + Returns + ------- + tuple[list[str], list[str]] + List of include patterns and list of exclude patterns. + """ + # + include_patterns = [ + "**/*.py", + "README.md", + "CHANGELOG.md", + "LICENSE", + "requirements.txt", + "pyproject.toml", + "setup.py", + "setup.cfg", + "MANIFEST.in", + "docs/**/*.md", + ] + + exclude_patterns = [ + "**/*.pyc", + "**/__pycache__/**", + "**/*.so" "**/*.pyd", + "**/*.dll", + "**/*.dylib", + "**/*.egg", + "**/*.whl", + "**/*.exe", + "**/*.png", + "**/*.jpg", + "**/*.jpeg", + "**/*.gif", + "**/*.ico", + "**/*.svg", + "**/*.mp4", + "**/*.mov", + "**/*.avi", + "**/*.mp3", + "**/*.wav", + "**/.git/**", + "**/.idea/**", + "**/.vscode/**", + "**/.env", + "**/.env.*", + "**/node_modules/**", + "**/venv/**", + "**/env/**", + "**/build/**", + "**/dist/**", + "**/.pytest_cache/**", + "**/.coverage", + "**/htmlcov/**", + "**/*.min.js", + "**/*.min.css", + "**/*.map", + "**/webpack.stats.json", + "**/ui/**/*.js", + "**/ui/**/*.css", + "**/ui/build/**", + "**/ui/dist/**", + "**/tests/**", + "**/test_*.py", + "**/*_test.py", + "**/*.ipynb", + "**/.ipynb_checkpoints/**", + ] + + return include_patterns, exclude_patterns + + +def smart_ingest(directory: str, max_file_size: int | None = None, output_format: str | None = None) -> dict[str, Any]: + """ + Perform smart ingest analysis on the given directory and return the report. + + Parameters + ---------- + directory : str + Directory path to analyze. + max_file_size : int + Maximum file size to analyze, by default None. + output_format : str + Output format of the report, by default None. + + Returns + ------- + dict[str, Any] + Report of the analysis. + + Raises + ------ + ValueError + If the output format is not supported. + """ + if max_file_size is None: + max_file_size = config.max_file_size + if output_format is None: + output_format = config.default_format + + if not config.validate_format(output_format): + raise ValueError(f"Unsupported output format: {output_format}") + + # Step 1: Generate directory tree + print("Step 1: Generating directory tree...", end="\n\n") + tree_output = generate_tree(directory) + print(tree_output) + + # Save the tree output to a file + tree_file = config.get_output_path(config.get_output_file("tree"), "trees") + with open(tree_file, "w", encoding=config.file_encoding) as f: + f.write(tree_output) + + # Step 2: Analyze the directory structure and suggest filter patterns + print("Step 2: Analyzing directory structure and suggesting filter patterns...") + include_patterns, exclude_patterns = analyze_tree_and_suggest_patterns(tree_output) + + print("Suggested include patterns:", end="\n\n") + for pattern in include_patterns: + print(f" - {pattern}") + + print("Suggested exclude patterns:", end="\n\n") + for pattern in exclude_patterns: + print(f" - {pattern}") + + # Step 3: Execute file analysis + print("\nStep 3: Executing file analysis...") + try: + summary, tree, content = ingest( + source=directory, + max_file_size=max_file_size, + include_patterns=include_patterns, + exclude_patterns=exclude_patterns, + output=config.get_output_path(config.get_output_file(output_format), "reports"), + ) + + # Report + report = { + "directory_tree": tree_output, + "suggested_patterns": { + "include": include_patterns, + "exclude": exclude_patterns, + }, + "analysis_result": { + "summary": summary, + "tree": tree, + "content": ( + content[: config.content_preview_length] + "..." + if len(content) > config.content_preview_length + else content + ), + }, + } + + # Save the report to a JSON file + json_file = config.get_output_path(config.get_output_file("json"), "reports") + with open(json_file, "w", encoding=config.file_encoding) as f: + json.dump(report, f, ensure_ascii=False, indent=2) + + return report + + except Exception as e: + return { + "error": f"分析过程中出错: {e}", + "directory_tree": tree_output, + "suggested_patterns": { + "include": include_patterns, + "exclude": exclude_patterns, + }, + } + + +def main() -> None: + # Parse command-line arguments + parser = argparse.ArgumentParser(description="Intelligent directory analysis tool") + parser.add_argument("--source", "-s", type=str, default=config.input_base_dir, help="Directory to analyze") + parser.add_argument( + "--source-type", + "-t", + type=str, + choices=config.supported_sources, + default=config.default_source, + help="Input source type", + ) + parser.add_argument("--max-depth", "-d", type=int, default=config.tree_max_depth, help="Maximum depth of the tree") + parser.add_argument( + "--max-size", "-m", type=int, default=config.max_file_size, help="Maximum file size to analyze" + ) + parser.add_argument("--output-dir", "-o", type=str, default=config.output_base_dir, help="Output base directory") + parser.add_argument( + "--format", + "-f", + type=str, + choices=config.supported_formats, + default=config.default_format, + help="Output format of the report", + ) + + args = parser.parse_args() + + print(f"Start analyzing directory: {args.source}") + print(f"Configuration information:") + print(f"- Input source type: {args.source_type}") + print(f"- Directory tree depth: {args.max_depth}") + print(f"- Maximum file size: {args.max_size / 1024 / 1024:.2f}MB") + print(f"- Output base directory: {args.output_dir}") + print(f"- Output format: {args.format}") + + # Perform smart ingest analysis + result = smart_ingest(directory=args.source, max_file_size=args.max_size, output_format=args.format) + + print(f"\nAnalysis completed! Output file:") + print(f"1. Directory tree: {config.get_output_path(config.get_output_file('tree'), 'trees')}") + print(f"2. Analysis report: {config.get_output_path(config.get_output_file(args.format), 'reports')}") + print(f"3. JSON report: {config.get_output_path(config.get_output_file('json'), 'reports')}") + + +if __name__ == "__main__": + main() diff --git a/src/config_manager.py b/src/config_manager.py new file mode 100644 index 00000000..1cf85592 --- /dev/null +++ b/src/config_manager.py @@ -0,0 +1,312 @@ +""" Configuration manager module for loading and managing configuration. """ + +import os +from typing import Any + +import yaml + + +class ConfigManager: + """Configuration manager class for loading and managing configuration.""" + + def __init__(self, config_file: str = "config.yaml") -> None: + """ + Initialize the configuration manager. + + Parameters + ---------- + config_file : str, optional + The configuration file path, by default "config.yaml". + """ + self.config_file = config_file + self.config = self._load_config() + self._init_directories() + + def _load_config(self) -> dict[str, Any]: + """ + Load configuration from the config file. + + Returns + ------- + dict[str, Any] + The configuration dictionary. + """ + try: + with open(self.config_file, encoding="utf-8") as f: + return yaml.safe_load(f) + except Exception as e: + print(f"Failed to load configuration file: {e}") + return {} + + def _init_directories(self) -> None: + """ + Initialize directories based on the configuration. + """ + # Create output base directory + os.makedirs(self.output_base_dir, exist_ok=True) + # Create reports directory + os.makedirs(self.reports_dir, exist_ok=True) + # Create trees directory + os.makedirs(self.trees_dir, exist_ok=True) + # Create temporary directory + os.makedirs(self.temp_dir, exist_ok=True) + + @property + def tree_max_depth(self) -> int: + """ + Get the maximum depth of the tree. + + Returns + ------- + int + The maximum depth of the tree. + """ + return self.config.get("tree", {}).get("max_depth", 4) + + @property + def max_file_size(self) -> int: + """ + Get the maximum file size. + + Returns + ------- + int + The maximum file size. + """ + return self.config.get("file", {}).get("max_size", 10 * 1024 * 1024) + + @property + def file_encoding(self) -> str: + """ + Get the file encoding. + + Returns + ------- + str + The file encoding. + """ + return self.config.get("file", {}).get("encoding", "utf-8") + + # Input path related + @property + def input_base_dir(self) -> str: + """ + Get the input base directory. + + Returns + ------- + str + The input base directory. + """ + return self.config.get("paths", {}).get("input", {}).get("base_dir", os.getcwd()) + + @property + def github_repo(self) -> str: + """ + Get the default GitHub repository address. + + Returns + ------- + str + The default GitHub repository address. + """ + return self.config.get("paths", {}).get("input", {}).get("github", "") + + @property + def gitlab_repo(self) -> str: + """ + Get the default GitLab repository address. + + Returns + ------- + str + The default GitLab repository address. + """ + return self.config.get("paths", {}).get("input", {}).get("gitlab", "") + + @property + def output_base_dir(self) -> str: + """ + Get the output base directory. + + Returns + ------- + str + The output base directory. + """ + return self.config.get("paths", {}).get("output", {}).get("base_dir", "output") + + @property + def reports_dir(self) -> str: + """ + Get the reports directory. + + Returns + ------- + str + The reports directory. + """ + reports = self.config.get("paths", {}).get("output", {}).get("reports", "reports") + return os.path.join(self.output_base_dir, reports) + + @property + def trees_dir(self) -> str: + """ + Get the trees directory." + + Returns + ------- + str + The trees directory. + """ + trees = self.config.get("paths", {}).get("output", {}).get("trees", "trees") + return os.path.join(self.output_base_dir, trees) + + @property + def temp_dir(self) -> str: + """ + Get the temporary directory. + + Returns + ------- + str + The temporary directory. + """ + temp = self.config.get("paths", {}).get("output", {}).get("temp", "temp") + return os.path.join(self.output_base_dir, temp) + + @property + def supported_formats(self) -> list[str]: + """ + Get the supported output formats. + + Returns + ------- + list[str] + The supported output formats. + """ + return self.config.get("output", {}).get("formats", ["md"]) + + @property + def default_format(self) -> str: + """ + Get the default output format. + + Returns + ------- + str + The default output format. + """ + return self.config.get("output", {}).get("default_format", "md") + + def get_output_file(self, format_type: str) -> str: + """ + Get the output file name based on the format type. + + Parameters + ---------- + format_type : str + The format type. + + Returns + ------- + str + The output file name. + """ + files = self.config.get("output", {}).get("files", {}) + return files.get(format_type, f"analysis_result.{format_type}") + + def get_output_path(self, filename: str, output_type: str = "reports") -> str: + """ + Get the full output path based on the filename and output type. + + Parameters + ---------- + filename : str + The filename to be used. + output_type : str, optional + The type of output (reports, trees, temp), by default "reports". + + Returns + ------- + str + The full output path. + """ + if output_type == "reports": + base_dir = self.reports_dir + elif output_type == "trees": + base_dir = self.trees_dir + elif output_type == "temp": + base_dir = self.temp_dir + else: + base_dir = self.output_base_dir + + return os.path.join(base_dir, filename) + + @property + def content_preview_length(self) -> int: + """ + Get the content preview length. + + Returns + ------- + int + The content preview length. + """ + return self.config.get("content", {}).get("preview_length", 1000) + + @property + def supported_sources(self) -> list[str]: + """ + Get the supported input sources. + + Returns + ------- + list[str] + The supported input sources. + """ + return self.config.get("input", {}).get("supported_sources", ["local"]) + + @property + def default_source(self) -> str: + """ + Get the default input source type. + + Returns + ------- + str + The default input source type. + """ + return self.config.get("input", {}).get("default_source", "local") + + def validate_format(self, format_type: str) -> bool: + """ + Validate if the output format is supported. + + Parameters + ---------- + format_type : str + The output format type. + + Returns + ------- + bool + True if the format is supported, False otherwise. + """ + return format_type in self.supported_formats + + def validate_source(self, source_type: str) -> bool: + """ + Validate if the input source is supported. + + Parameters + ---------- + source_type : str + The input source type. + + Returns + ------- + bool + True if the source is supported, False otherwise. + """ + return source_type in self.supported_sources