@@ -60,7 +60,8 @@ def generate_s3_file_path(
6060 """Generate S3 file path with proper naming convention.
6161
6262 The file path is formatted as:
63- [<S3_DIRECTORY_PREFIX>/]ingest/<provider>/<repo-owner>/<repo-name>/<branch>/<commit-ID>/<exclude&include hash>.txt
63+ [<S3_DIRECTORY_PREFIX>/]ingest/<provider>/<repo-owner>/<repo-name>/<branch>/<commit-ID>/
64+ <exclude&include hash>/<owner>-<repo-name>.txt
6465
6566 If S3_DIRECTORY_PREFIX environment variable is set, it will be prefixed to the path.
6667 The commit-ID is always included in the URL.
@@ -98,20 +99,13 @@ def generate_s3_file_path(
9899 logger .error (msg )
99100 raise ValueError (msg )
100101
101- # Extract source from URL or default to "unknown"
102- git_source = {
103- "github.com" : "github" ,
104- "gitlab.com" : "gitlab" ,
105- "bitbucket.org" : "bitbucket" ,
106- }.get (hostname , "unknown" )
107-
108102 # Create hash of exclude/include patterns for uniqueness
109103 patterns_str = f"include:{ sorted (include_patterns ) if include_patterns else []} "
110104 patterns_str += f"exclude:{ sorted (ignore_patterns )} "
111105 patterns_hash = hashlib .sha256 (patterns_str .encode ()).hexdigest ()[:16 ]
112106
113- # Build the base path
114- base_path = f"ingest/{ git_source } /{ user_name } /{ repo_name } /{ commit } /{ patterns_hash } .txt"
107+ # Build the base path using hostname directly
108+ base_path = f"ingest/{ hostname } /{ user_name } /{ repo_name } /{ commit } /{ patterns_hash } / { user_name } - { repo_name } .txt"
115109
116110 # Check for S3_DIRECTORY_PREFIX environment variable
117111 s3_directory_prefix = os .getenv ("S3_DIRECTORY_PREFIX" )
0 commit comments