Skip to content

Commit 5bfd150

Browse files
committed
make all baseline tests pass
Done by Sonnet 4.5
1 parent e871c55 commit 5bfd150

File tree

6 files changed

+194
-138
lines changed

6 files changed

+194
-138
lines changed

gix-url/src/lib.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,9 @@ impl Url {
380380

381381
out.write_all(self.scheme.as_str().as_bytes())?;
382382
out.write_all(b"://")?;
383+
384+
let needs_brackets = self.port.is_some() && self.host.as_ref().is_some_and(|h| Self::is_ipv6(h));
385+
383386
match (&self.user, &self.host) {
384387
(Some(user), Some(host)) => {
385388
out.write_all(percent_encode(user).as_bytes())?;
@@ -388,10 +391,22 @@ impl Url {
388391
out.write_all(percent_encode(password).as_bytes())?;
389392
}
390393
out.write_all(b"@")?;
394+
if needs_brackets {
395+
out.write_all(b"[")?;
396+
}
391397
out.write_all(host.as_bytes())?;
398+
if needs_brackets {
399+
out.write_all(b"]")?;
400+
}
392401
}
393402
(None, Some(host)) => {
403+
if needs_brackets {
404+
out.write_all(b"[")?;
405+
}
394406
out.write_all(host.as_bytes())?;
407+
if needs_brackets {
408+
out.write_all(b"]")?;
409+
}
395410
}
396411
(None, None) => {}
397412
(Some(_user), None) => {
@@ -403,11 +418,22 @@ impl Url {
403418
if let Some(port) = &self.port {
404419
write!(out, ":{port}")?;
405420
}
421+
// For SSH and Git URLs, add leading '/' if path doesn't start with '/'
422+
// This handles paths like "~repo" which serialize as "/~repo" in URL form
423+
if matches!(self.scheme, Scheme::Ssh | Scheme::Git) && !self.path.starts_with(b"/") {
424+
out.write_all(b"/")?;
425+
}
406426
out.write_all(&self.path)?;
407427
Ok(())
408428
}
409429

430+
fn is_ipv6(host: &str) -> bool {
431+
host.contains(':') && !host.starts_with('[')
432+
}
433+
410434
fn write_alternative_form_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
435+
let needs_brackets = self.host.as_ref().is_some_and(|h| Self::is_ipv6(h));
436+
411437
match (&self.user, &self.host) {
412438
(Some(user), Some(host)) => {
413439
out.write_all(user.as_bytes())?;
@@ -416,10 +442,22 @@ impl Url {
416442
"BUG: cannot serialize password in alternative form"
417443
);
418444
out.write_all(b"@")?;
445+
if needs_brackets {
446+
out.write_all(b"[")?;
447+
}
419448
out.write_all(host.as_bytes())?;
449+
if needs_brackets {
450+
out.write_all(b"]")?;
451+
}
420452
}
421453
(None, Some(host)) => {
454+
if needs_brackets {
455+
out.write_all(b"[")?;
456+
}
422457
out.write_all(host.as_bytes())?;
458+
if needs_brackets {
459+
out.write_all(b"]")?;
460+
}
423461
}
424462
(None, None) => {}
425463
(Some(_user), None) => {

gix-url/src/parse.rs

Lines changed: 85 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,23 @@ pub(crate) fn find_scheme(input: &BStr) -> InputScheme {
7070
return InputScheme::Url { protocol_end };
7171
}
7272

73-
if let Some(colon) = input.find_byte(b':') {
73+
// Find colon, but skip over IPv6 brackets if present
74+
let colon = if input.starts_with(b"[") {
75+
// IPv6 address, find the closing bracket first
76+
if let Some(bracket_end) = input.find_byte(b']') {
77+
// Look for colon after the bracket
78+
input[bracket_end + 1..]
79+
.find_byte(b':')
80+
.map(|pos| bracket_end + 1 + pos)
81+
} else {
82+
// No closing bracket, treat as regular search
83+
input.find_byte(b':')
84+
}
85+
} else {
86+
input.find_byte(b':')
87+
};
88+
89+
if let Some(colon) = colon {
7490
// allow user to select files containing a `:` by passing them as absolute or relative path
7591
// this is behavior explicitly mentioned by the scp and git manuals
7692
let explicitly_local = &input[..colon].contains(&b'/');
@@ -111,20 +127,57 @@ pub(crate) fn url(input: &BStr, protocol_end: usize) -> Result<crate::Url, Error
111127
// Normalize empty path to "/" for http/https URLs only
112128
let path = if url.path.is_empty() && matches!(scheme, Scheme::Http | Scheme::Https) {
113129
"/".into()
130+
} else if matches!(scheme, Scheme::Ssh | Scheme::Git) && url.path.starts_with("/~") {
131+
// For SSH and Git protocols, strip leading '/' from paths starting with '~'
132+
// e.g., "ssh://host/~repo" -> path is "~repo", not "/~repo"
133+
url.path[1..].into()
114134
} else {
115135
url.path.into()
116136
};
117137

138+
let user = url_user(&url, UrlKind::Url)?;
139+
let password = url
140+
.password
141+
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
142+
.transpose()?;
143+
let port = url.port;
144+
145+
// For SSH URLs, strip brackets from IPv6 addresses
146+
let host = if scheme == Scheme::Ssh {
147+
url.host.map(|mut h| {
148+
// Check if we have bracketed IPv6 with trailing colon: "[::1]:"
149+
if h.starts_with('[') {
150+
if h.ends_with("]:") {
151+
// "[::1]:" -> "::1" (strip brackets and colon)
152+
h = h[1..h.len() - 2].to_string();
153+
} else if h.ends_with(']') {
154+
// "[::1]" -> "::1" (just strip brackets)
155+
h = h[1..h.len() - 1].to_string();
156+
}
157+
} else {
158+
// For non-bracketed hosts, only strip trailing colon if it's not part of IPv6
159+
// Count colons: if there's only one colon and it's at the end, strip it
160+
// Otherwise (multiple colons or colon not at end), keep it
161+
let colon_count = h.chars().filter(|&c| c == ':').count();
162+
if colon_count == 1 && h.ends_with(':') {
163+
// Regular host with empty port "host:" -> "host"
164+
h = h[..h.len() - 1].to_string();
165+
}
166+
// For bare IPv6 with trailing colon "::1:", keep it as is (colon_count > 1)
167+
}
168+
h
169+
})
170+
} else {
171+
url.host
172+
};
173+
118174
Ok(crate::Url {
119175
serialize_alternative_form: false,
120176
scheme,
121-
user: url_user(&url, UrlKind::Url)?,
122-
password: url
123-
.password
124-
.map(|s| percent_decoded_utf8(s, UrlKind::Url))
125-
.transpose()?,
126-
host: url.host,
127-
port: url.port,
177+
user,
178+
password,
179+
host,
180+
port,
128181
path,
129182
})
130183
}
@@ -166,16 +219,33 @@ pub(crate) fn scp(input: &BStr, colon: usize) -> Result<crate::Url, Error> {
166219
source,
167220
})?;
168221

222+
// For SCP-like SSH URLs, strip leading '/' from paths starting with '/~'
223+
// e.g., "user@host:/~repo" -> path is "~repo", not "/~repo"
224+
let path = if path.starts_with("/~") { &path[1..] } else { path };
225+
226+
let user = url_user(&url, UrlKind::Scp)?;
227+
let password = url
228+
.password
229+
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
230+
.transpose()?;
231+
let port = url.port;
232+
233+
// For SCP-like SSH URLs, strip brackets from IPv6 addresses
234+
let host = url.host.map(|h| {
235+
if h.starts_with('[') && h.ends_with(']') {
236+
h[1..h.len() - 1].to_string()
237+
} else {
238+
h
239+
}
240+
});
241+
169242
Ok(crate::Url {
170243
serialize_alternative_form: true,
171244
scheme: Scheme::from(url.scheme.as_str()),
172-
user: url_user(&url, UrlKind::Scp)?,
173-
password: url
174-
.password
175-
.map(|s| percent_decoded_utf8(s, UrlKind::Scp))
176-
.transpose()?,
177-
host: url.host,
178-
port: url.port,
245+
user,
246+
password,
247+
host,
248+
port,
179249
path: path.into(),
180250
})
181251
}

gix-url/src/simple_url.rs

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -115,18 +115,25 @@ impl<'a> ParsedUrl<'a> {
115115
// Handle IPv6 addresses: [::1] or [::1]:port
116116
if host_port.starts_with('[') {
117117
if let Some(bracket_end) = host_port.find(']') {
118-
// IPv6 addresses are case-insensitive, normalize to lowercase
119-
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
120118
let remaining = &host_port[bracket_end + 1..];
121119

122120
if remaining.is_empty() {
121+
// IPv6 addresses are case-insensitive, normalize to lowercase
122+
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
123123
return Ok((host, None));
124124
} else if let Some(port_str) = remaining.strip_prefix(':') {
125+
if port_str.is_empty() {
126+
// Empty port like "[::1]:" - preserve the trailing colon for Git compatibility
127+
let host = Some(host_port.to_ascii_lowercase());
128+
return Ok((host, None));
129+
}
125130
let port = port_str.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
126131
// Validate port is in valid range (1-65535, port 0 is invalid)
127132
if port == 0 {
128133
return Err(UrlParseError::InvalidPort);
129134
}
135+
// IPv6 addresses are case-insensitive, normalize to lowercase
136+
let host = Some(host_port[..=bracket_end].to_ascii_lowercase());
130137
return Ok((host, Some(port)));
131138
} else {
132139
return Err(UrlParseError::InvalidDomainCharacter);
@@ -137,27 +144,41 @@ impl<'a> ParsedUrl<'a> {
137144
}
138145

139146
// Handle regular host:port
140-
// Use rfind to handle IPv6 addresses without brackets (edge case)
147+
// Use rfind to find the last colon
141148
if let Some(colon_pos) = host_port.rfind(':') {
149+
let before_last_colon = &host_port[..colon_pos];
150+
let after_last_colon = &host_port[colon_pos + 1..];
151+
142152
// Check if this looks like a port (all digits after colon)
143-
let potential_port = &host_port[colon_pos + 1..];
144-
if potential_port.is_empty() {
145-
// Empty port like "host:" - strip the trailing colon
146-
let host_str = &host_port[..colon_pos];
147-
return Ok((Some(Self::normalize_hostname(host_str)?), None));
148-
} else if potential_port.chars().all(|c| c.is_ascii_digit()) {
149-
let host_str = &host_port[..colon_pos];
150-
let host = Self::normalize_hostname(host_str)?;
151-
let port = potential_port.parse::<u16>().map_err(|_| UrlParseError::InvalidPort)?;
152-
// Validate port is in valid range (1-65535, port 0 is invalid)
153-
if port == 0 {
154-
return Err(UrlParseError::InvalidPort);
153+
// But avoid treating IPv6 addresses as host:port
154+
// IPv6 addresses have colons in the part before the last colon (e.g., "::1" has "::" before the last ":")
155+
let has_colon_before_last = before_last_colon.contains(':');
156+
let is_all_digits_after =
157+
!after_last_colon.is_empty() && after_last_colon.chars().all(|c| c.is_ascii_digit());
158+
159+
// Treat as port separator only if:
160+
// 1. There's no colon before the last colon (normal host:port)
161+
// 2. OR it's explicitly empty (host: with trailing colon)
162+
if !has_colon_before_last {
163+
if after_last_colon.is_empty() {
164+
// Empty port like "host:" - store host with trailing colon
165+
// This is needed for Git compatibility where "host:" != "host"
166+
return Ok((Some(Self::normalize_hostname(host_port)?), None));
167+
} else if is_all_digits_after {
168+
let host = Self::normalize_hostname(before_last_colon)?;
169+
let port = after_last_colon
170+
.parse::<u16>()
171+
.map_err(|_| UrlParseError::InvalidPort)?;
172+
// Validate port is in valid range (1-65535, port 0 is invalid)
173+
if port == 0 {
174+
return Err(UrlParseError::InvalidPort);
175+
}
176+
return Ok((Some(host), Some(port)));
155177
}
156-
return Ok((Some(host), Some(port)));
157178
}
158179
}
159180

160-
// No port, just host
181+
// No port, just host (including bare IPv6 addresses)
161182
Ok((Some(Self::normalize_hostname(host_port)?), None))
162183
}
163184

0 commit comments

Comments
 (0)