Skip to content

Commit

Permalink
Normalize URL paths: convert /.//p, /..//p, and //p to p
Browse files Browse the repository at this point in the history
  • Loading branch information
theskim committed Nov 27, 2024
1 parent ca4b5dc commit 99f44a8
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 5 deletions.
54 changes: 53 additions & 1 deletion url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1757,6 +1757,39 @@ impl Url {
let old_after_path_pos = to_u32(self.serialization.len()).unwrap();
let cannot_be_a_base = self.cannot_be_a_base();
let scheme_type = SchemeType::from(self.scheme());
let mut path_empty = false;

// Check ':' and then see if the next character is '/'
let mut has_host = if let Some(index) = self.serialization.find(":") {
if self.serialization.len() > index + 1
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
{
let rest = &self.serialization[(index + ":/".len())..];
let host_part = rest.split('/').next().unwrap_or("");
path_empty = rest.is_empty();
!host_part.is_empty() && !host_part.contains('@')
} else {
false
}
} else {
false

Check warning on line 1775 in url/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

url/src/lib.rs#L1775

Added line #L1775 was not covered by tests
};

// Ensure the path length is greater than 1 to account
// for cases where "/." is already appended from serialization
// If we set path, then we already checked the other two conditions:
// https://url.spec.whatwg.org/#url-serializing
// 1. The host is null
// 2. the first segment of the URL's path is an empty string
if path.len() > 1 {
if let Some(index) = self.serialization.find(":") {
let removal_start = index + ":".len();
if self.serialization[removal_start..].starts_with("/.") {
self.path_start -= "/.".len() as u32;
}
}
}

self.serialization.truncate(self.path_start as usize);
self.mutate(|parser| {
if cannot_be_a_base {
Expand All @@ -1766,14 +1799,33 @@ impl Url {
}
parser.parse_cannot_be_a_base_path(parser::Input::new_no_trim(path));
} else {
let mut has_host = true; // FIXME
parser.parse_path_start(
scheme_type,
&mut has_host,
parser::Input::new_no_trim(path),
);
}
});

// For cases where normalization is applied across both the serialization and the path.
// Append "/." immediately after the scheme (up to ":")
// This is done if three conditions are met.
// https://url.spec.whatwg.org/#url-serializing
// 1. The host is null
// 2. The url's path length is greater than 1
// 3. the first segment of the URL's path is an empty string
if !has_host && path.len() > 1 && path_empty {
if let Some(index) = self.serialization.find(":") {
if self.serialization.len() > index + 2
&& self.serialization.as_bytes().get(index + 1) == Some(&b'/')
&& self.serialization.as_bytes().get(index + 2) == Some(&b'/')
{
self.serialization.insert_str(index + ":".len(), "/.");
self.path_start += "/.".len() as u32;
}
}
}

self.restore_after_path(old_after_path_pos, &after_path);
}

Expand Down
4 changes: 0 additions & 4 deletions url/tests/expected_failures.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,3 @@
<file://monkey/> set pathname to <\\\\>
<file:///unicorn> set pathname to <//\\/>
<file:///unicorn> set pathname to <//monkey/..//>
<non-spec:/> set pathname to </.//p>
<non-spec:/> set pathname to </..//p>
<non-spec:/> set pathname to <//p>
<non-spec:/.//> set pathname to <p>

0 comments on commit 99f44a8

Please sign in to comment.