|
32 | 32 | META_FILE_FAILED, MAVEN_METADATA_TEMPLATE, |
33 | 33 | ARCHETYPE_CATALOG_TEMPLATE, ARCHETYPE_CATALOG_FILENAME, |
34 | 34 | PACKAGE_TYPE_MAVEN) |
35 | | -from typing import Dict, List, Tuple |
| 35 | +from typing import Dict, List, Tuple, Union |
36 | 36 | from jinja2 import Template |
37 | 37 | from datetime import datetime |
38 | 38 | from zipfile import ZipFile, BadZipFile |
39 | 39 | from tempfile import mkdtemp |
| 40 | +from shutil import rmtree, copy2 |
40 | 41 | from defusedxml import ElementTree |
41 | 42 |
|
42 | 43 | import os |
@@ -261,7 +262,7 @@ def __gen_digest_file(hash_file_path, meta_file_path: str, hashtype: HashType) - |
261 | 262 |
|
262 | 263 |
|
263 | 264 | def handle_maven_uploading( |
264 | | - repo: str, |
| 265 | + repos: Union[str, List[str]], |
265 | 266 | prod_key: str, |
266 | 267 | ignore_patterns=None, |
267 | 268 | root="maven-repository", |
@@ -294,8 +295,10 @@ def handle_maven_uploading( |
294 | 295 | """ |
295 | 296 | if targets is None: |
296 | 297 | targets = [] |
297 | | - # 1. extract tarball |
298 | | - tmp_root = _extract_tarball(repo, prod_key, dir__=dir_) |
| 298 | + if isinstance(repos, str): |
| 299 | + repos = [repos] |
| 300 | + # 1. extract tarballs |
| 301 | + tmp_root = _extract_tarballs(repos, root, prod_key, dir__=dir_) |
299 | 302 |
|
300 | 303 | # 2. scan for paths and filter out the ignored paths, |
301 | 304 | # and also collect poms for later metadata generation |
@@ -673,6 +676,135 @@ def _extract_tarball(repo: str, prefix="", dir__=None) -> str: |
673 | 676 | sys.exit(1) |
674 | 677 |
|
675 | 678 |
|
| 679 | +def _extract_tarballs(repos: List[str], root: str, prefix="", dir__=None) -> str: |
| 680 | + """ Extract multiple zip archives to a temporary directory. |
| 681 | + * repos are the list of repo paths to extract |
| 682 | + * root is a prefix in the tarball to identify which path is |
| 683 | + the beginning of the maven GAV path |
| 684 | + * prefix is the prefix for temporary directory name |
| 685 | + * dir__ is the directory where temporary directories will be created. |
| 686 | +
|
| 687 | + Returns the path to the merged temporary directory containing all extracted files |
| 688 | + """ |
| 689 | + # Create final merge directory |
| 690 | + final_tmp_root = mkdtemp(prefix=f"charon-{prefix}-final-", dir=dir__) |
| 691 | + |
| 692 | + total_copied = 0 |
| 693 | + total_overwritten = 0 |
| 694 | + total_processed = 0 |
| 695 | + |
| 696 | + # Collect all extracted directories first |
| 697 | + extracted_dirs = [] |
| 698 | + |
| 699 | + for repo in repos: |
| 700 | + if os.path.exists(repo): |
| 701 | + try: |
| 702 | + logger.info("Extracting tarball %s", repo) |
| 703 | + repo_zip = ZipFile(repo) |
| 704 | + tmp_root = mkdtemp(prefix=f"charon-{prefix}-", dir=dir__) |
| 705 | + extract_zip_all(repo_zip, tmp_root) |
| 706 | + extracted_dirs.append(tmp_root) |
| 707 | + |
| 708 | + except BadZipFile as e: |
| 709 | + logger.error("Tarball extraction error: %s", e) |
| 710 | + sys.exit(1) |
| 711 | + else: |
| 712 | + logger.error("Error: archive %s does not exist", repo) |
| 713 | + sys.exit(1) |
| 714 | + |
| 715 | + # Merge all extracted directories |
| 716 | + if extracted_dirs: |
| 717 | + # Get top-level directory names for merged from all repos |
| 718 | + top_level_merged_name_dirs = [] |
| 719 | + for extracted_dir in extracted_dirs: |
| 720 | + for item in os.listdir(extracted_dir): |
| 721 | + item_path = os.path.join(extracted_dir, item) |
| 722 | + # Check the root maven-repository subdirectory existence |
| 723 | + maven_repo_path = os.path.join(item_path, root) |
| 724 | + if os.path.isdir(item_path) and os.path.exists(maven_repo_path): |
| 725 | + top_level_merged_name_dirs.append(item) |
| 726 | + break |
| 727 | + |
| 728 | + # Create merged directory name |
| 729 | + merged_dir_name = ( |
| 730 | + "_".join(top_level_merged_name_dirs) if top_level_merged_name_dirs else "merged" |
| 731 | + ) |
| 732 | + merged_dest_dir = os.path.join(final_tmp_root, merged_dir_name) |
| 733 | + |
| 734 | + # Merge content from all extracted directories |
| 735 | + for extracted_dir in extracted_dirs: |
| 736 | + copied, overwritten, processed = _merge_directories_with_rename( |
| 737 | + extracted_dir, merged_dest_dir, root |
| 738 | + ) |
| 739 | + total_copied += copied |
| 740 | + total_overwritten += overwritten |
| 741 | + total_processed += processed |
| 742 | + |
| 743 | + # Clean up temporary extraction directory |
| 744 | + rmtree(extracted_dir) |
| 745 | + |
| 746 | + logger.info( |
| 747 | + "All zips merged! Total copied: %s, Total overwritten: %s, Total processed: %s", |
| 748 | + total_copied, |
| 749 | + total_overwritten, |
| 750 | + total_processed, |
| 751 | + ) |
| 752 | + return final_tmp_root |
| 753 | + |
| 754 | + |
| 755 | +def _merge_directories_with_rename(src_dir: str, dest_dir: str, root: str): |
| 756 | + """ Recursively copy files from src_dir to dest_dir, overwriting existing files. |
| 757 | + * src_dir is the source directory to copy from |
| 758 | + * dest_dir is the destination directory to copy to. |
| 759 | +
|
| 760 | + Returns Tuple of (copied_count, overwritten_count, processed_count) |
| 761 | + """ |
| 762 | + copied_count = 0 |
| 763 | + overwritten_count = 0 |
| 764 | + processed_count = 0 |
| 765 | + |
| 766 | + # Find the actual content directory |
| 767 | + content_root = src_dir |
| 768 | + for item in os.listdir(src_dir): |
| 769 | + item_path = os.path.join(src_dir, item) |
| 770 | + # Check the root maven-repository subdirectory existence |
| 771 | + maven_repo_path = os.path.join(item_path, root) |
| 772 | + if os.path.isdir(item_path) and os.path.exists(maven_repo_path): |
| 773 | + content_root = item_path |
| 774 | + break |
| 775 | + |
| 776 | + # pylint: disable=unused-variable |
| 777 | + for root_dir, dirs, files in os.walk(content_root): |
| 778 | + # Calculate relative path from content root |
| 779 | + rel_path = os.path.relpath(root_dir, content_root) |
| 780 | + dest_root = os.path.join(dest_dir, rel_path) if rel_path != '.' else dest_dir |
| 781 | + |
| 782 | + # Create destination directory if it doesn't exist |
| 783 | + os.makedirs(dest_root, exist_ok=True) |
| 784 | + |
| 785 | + # Copy all files, overwriting existing ones |
| 786 | + for file in files: |
| 787 | + src_file = os.path.join(root_dir, file) |
| 788 | + dest_file = os.path.join(dest_root, file) |
| 789 | + if os.path.exists(dest_file): |
| 790 | + overwritten_count += 1 |
| 791 | + logger.debug("Overwritten: %s -> %s", src_file, dest_file) |
| 792 | + else: |
| 793 | + copied_count += 1 |
| 794 | + logger.debug("Copied: %s -> %s", src_file, dest_file) |
| 795 | + |
| 796 | + processed_count += 1 |
| 797 | + copy2(src_file, dest_file) |
| 798 | + |
| 799 | + logger.info( |
| 800 | + "One zip merged! Files copied: %s, Files overwritten: %s, Total files processed: %s", |
| 801 | + copied_count, |
| 802 | + overwritten_count, |
| 803 | + processed_count, |
| 804 | + ) |
| 805 | + return copied_count, overwritten_count, processed_count |
| 806 | + |
| 807 | + |
676 | 808 | def _scan_paths(files_root: str, ignore_patterns: List[str], |
677 | 809 | root: str) -> Tuple[str, List[str], List[str], List[str]]: |
678 | 810 | # 2. scan for paths and filter out the ignored paths, |
|
0 commit comments