Skip to content

Commit

Permalink
Fix mutating path of URL without authority (idempotency, empty path s…
Browse files Browse the repository at this point in the history
…egments)
  • Loading branch information
theskim committed Dec 17, 2024
1 parent f8b2d5d commit cff3eba
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 5 deletions.
9 changes: 8 additions & 1 deletion url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2129,7 +2129,7 @@ impl Url {
} else {
self.host_end
};
let suffix = self.slice(old_suffix_pos..).to_owned();
let mut suffix = self.slice(old_suffix_pos..).to_owned();
self.serialization.truncate(self.host_start as usize);
if !self.has_authority() {
debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
Expand All @@ -2143,6 +2143,13 @@ impl Url {
self.host_end = to_u32(self.serialization.len()).unwrap();
self.host = host.into();

// Adjust serialization to switch from host to empty segment
if suffix.starts_with("/.//") {
suffix.drain(.."/.".len());
// pathname should be "//p" not "p" given that the first segment was empty
self.path_start -= "/.".len() as u32;
}

if let Some(new_port) = opt_new_port {
self.port = new_port;
if let Some(port) = new_port {
Expand Down
42 changes: 40 additions & 2 deletions url/src/path_segments.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ impl<'a> PathSegmentsMut<'a> {
I::Item: AsRef<str>,
{
let scheme_type = SchemeType::from(self.url.scheme());
let path_start = self.url.path_start as usize;
let mut path_start = self.url.path_start as usize;
self.url.mutate(|parser| {
parser.context = parser::Context::PathSegmentSetter;
for segment in segments {
Expand All @@ -253,7 +253,44 @@ impl<'a> PathSegmentsMut<'a> {
{
parser.serialization.push('/');
}
let mut has_host = true; // FIXME account for this?

let mut path_empty = false;

// Check ':' and then see if the next character is '/'
let mut has_host = if let Some(index) = parser.serialization.find(":") {
if parser.serialization.len() > index + 1
&& parser.serialization.as_bytes().get(index + 1) == Some(&b'/')
{
let rest = &parser.serialization[(index + ":/".len())..];
let host_part = rest.split('/').next().unwrap_or("");
path_empty = rest.is_empty();
!host_part.is_empty() && !host_part.contains('@')
} else {
false

Check warning on line 269 in url/src/path_segments.rs

View check run for this annotation

Codecov / codecov/patch

url/src/path_segments.rs#L269

Added line #L269 was not covered by tests
}
} else {
false

Check warning on line 272 in url/src/path_segments.rs

View check run for this annotation

Codecov / codecov/patch

url/src/path_segments.rs#L272

Added line #L272 was not covered by tests
};

// For cases where normalization is applied across both the serialization and the path.
// Append "/." immediately after the scheme (up to ":")
// This is done if three conditions are met.
// https://url.spec.whatwg.org/#url-serializing
// 1. The host is null
// 2. The url's path length is greater than 1
// 3. the first segment of the URL's path is an empty string
if !has_host && segment.len() > 1 && path_empty {
if let Some(index) = parser.serialization.find(":") {
if parser.serialization.len() == index + 2
&& parser.serialization.as_bytes().get(index + 1) == Some(&b'/')
{
// Append an extra '/' to ensure that "/./path" becomes "/.//path"
parser.serialization.insert_str(index + ":".len(), "/./");
path_start += "/.".len();
}
}
}

parser.parse_path(
scheme_type,
&mut has_host,
Expand All @@ -262,6 +299,7 @@ impl<'a> PathSegmentsMut<'a> {
);
}
});
self.url.path_start = path_start as u32;
self
}
}
2 changes: 0 additions & 2 deletions url/tests/expected_failures.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,6 @@
<file:/.//p>
<http://example.net/path> set hostname to <example.com:8080>
<http://example.net:8080/path> set hostname to <example.com:>
<non-spec:/.//p> set hostname to <h>
<non-spec:/.//p> set hostname to <>
<foo:///some/path> set pathname to <>
<file:///var/log/system.log> set href to <http://0300.168.0xF0>
<file://monkey/> set pathname to <\\\\>
Expand Down
43 changes: 43 additions & 0 deletions url/tests/unit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1419,3 +1419,46 @@ fn test_fuzzing_uri_failures() {
assert_eq!(url.as_str(), "web+demo:////.dummy.path");
url.check_invariants().unwrap();
}

#[test]
fn test_can_be_a_base_with_set_path() {
use url::quirks;
let mut url = Url::parse("web+demo:/").unwrap();
assert!(!url.cannot_be_a_base());

url.set_path("//not-a-host");
assert_eq!(url.path(), "//not-a-host");

let segments: Vec<_> = url
.path_segments()
.expect("should have path segments")
.collect();

assert_eq!(segments, vec!["", "not-a-host"]);

assert_eq!(url.as_str(), "web+demo:/.//not-a-host");
quirks::set_hostname(&mut url, "test").unwrap();
assert_eq!(url.as_str(), "web+demo://test//not-a-host");
quirks::set_hostname(&mut url, "").unwrap();
assert_eq!(url.as_str(), "web+demo:////not-a-host");
}

#[test]
fn test_can_be_a_base_with_path_segments_mut() {
let mut url = Url::parse("web+demo:/").unwrap();
assert!(!url.cannot_be_a_base());

url.path_segments_mut()
.expect("should have path segments")
.push("")
.push("not-a-host");

assert_eq!(url.as_str(), "web+demo:/.//not-a-host");
assert_eq!(url.path(), "//not-a-host");

let segments: Vec<_> = url
.path_segments()
.expect("should have path segments")
.collect();
assert_eq!(segments, vec!["", "not-a-host"]);
}

0 comments on commit cff3eba

Please sign in to comment.