Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional percent-encode sets #837

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions percent_encoding/src/ascii_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,125 @@ pub const CONTROLS: &AsciiSet = &AsciiSet {
],
};

/// The fragment percent-encode set.
///
/// The C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`).
///
/// <https://url.spec.whatwg.org/#fragment-percent-encode-set>
pub const FRAGMENT: &AsciiSet = &CONTROLS
// U+0020 SPACE
.add(b' ')
// U+0022 (")
.add(b'"')
// U+003C (<)
.add(b'<')
// U+003E (>)
.add(b'>')
// U+0060 (`)
.add(b'`');

/// The query percent-encode set.
///
/// The C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>).
///
/// <https://url.spec.whatwg.org/#query-percent-encode-set>
pub const QUERY: &AsciiSet = &CONTROLS
// U+0020 SPACE
.add(b' ')
// U+0022 (")
.add(b'"')
// U+0023 (#)
.add(b'#')
// U+003C (<)
.add(b'<')
// U+003E (>)
.add(b'>');

/// The special-query percent-encode set.
///
/// The query percent-encode set and U+0027 (').
///
/// <https://url.spec.whatwg.org/#special-query-percent-encode-set>
pub const SPECIAL_QUERY: &AsciiSet = &QUERY
// U+0027 (')
.add(b'\'');

/// The path percent-encode set.
///
/// The query percent-encode set and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}).
///
/// <https://url.spec.whatwg.org/#path-percent-encode-set>
pub const PATH: &AsciiSet = &QUERY
// U+003F (?)
.add(b'?')
// U+0060 (`)
.add(b'`')
// U+007B ({)
.add(b'{')
// U+007D (})
.add(b'}');

/// The userinfo percent-encode set.
///
/// The path percent-encode set and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([) to U+005E (^), inclusive, and U+007C (|).
///
/// <https://url.spec.whatwg.org/#userinfo-percent-encode-set>
pub const USERINFO: &AsciiSet = &PATH
// U+002F (/)
.add(b'/')
// U+003A (:)
.add(b':')
// U+003B (;)
.add(b';')
// U+003D (=)
.add(b'=')
// U+0040 (@)
.add(b'@')
// U+005B ([)
.add(b'[')
// U+005C (\)
.add(b'\\')
// U+005D (])
.add(b']')
// U+005E (^)
.add(b'^')
// U+007C (|)
.add(b'|');

/// The component percent-encode set.
///
/// The userinfo percent-encode set and U+0024 ($) to U+0026 (&), inclusive, U+002B (+), and U+002C (,).
///
/// <https://url.spec.whatwg.org/#component-percent-encode-set>
pub const COMPONENT: &AsciiSet = &USERINFO
// U+0024 ($)
.add(b'$')
// U+0025 (%)
.add(b'%')
// U+0026 (&)
.add(b'&')
// U+002B (+)
.add(b'+')
// U+002C (,)
.add(b',');

/// The `application/x-www-form-urlencoded` percent-encode set.
///
/// The component percent-encode set and U+0021 (!), U+0027 (') to U+0029 RIGHT PARENTHESIS, inclusive, and U+007E (~).
///
/// <https://url.spec.whatwg.org/#application-x-www-form-urlencoded-percent-encode-set>
pub const FORM: &AsciiSet = &COMPONENT
// U+0021 (!)
.add(b'!')
// U+0027 (')
.add(b'\'')
// U+0028 LEFT PARENTHESIS
.add(b'(')
// U+0029 RIGHT PARENTHESIS
.add(b')')
// and U+007E (~)
.add(b'~');

macro_rules! static_assert {
($( $bool: expr, )+) => {
fn _static_assert() {
Expand Down
2 changes: 1 addition & 1 deletion percent_encoding/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ use core::{fmt, slice, str};

pub use self::ascii_set::{AsciiSet, CONTROLS, NON_ALPHANUMERIC};

mod ascii_set;
pub mod ascii_set;

/// Return the percent-encoding of the given byte.
///
Expand Down
3 changes: 2 additions & 1 deletion url/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ use crate::net::IpAddr;
target_os = "hermit"
))]
use crate::net::{SocketAddr, ToSocketAddrs};
use crate::parser::{to_u32, Context, Parser, SchemeType, USERINFO};
use crate::parser::{to_u32, Context, Parser, SchemeType};
use alloc::borrow::ToOwned;
use alloc::str;
use alloc::string::{String, ToString};
Expand All @@ -182,6 +182,7 @@ use core::convert::TryFrom;
use core::fmt::Write;
use core::ops::{Range, RangeFrom, RangeTo};
use core::{cmp, fmt, hash, mem};
use percent_encoding::ascii_set::USERINFO;
use percent_encoding::utf8_percent_encode;
#[cfg(feature = "std")]
#[cfg(any(
Expand Down
28 changes: 4 additions & 24 deletions url/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,17 @@ use core::str;
use crate::host::{Host, HostInternal};
use crate::Url;
use form_urlencoded::EncodingOverride;
use percent_encoding::{percent_encode, utf8_percent_encode, AsciiSet, CONTROLS};

/// https://url.spec.whatwg.org/#fragment-percent-encode-set
const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');

/// https://url.spec.whatwg.org/#path-percent-encode-set
const PATH: &AsciiSet = &FRAGMENT.add(b'#').add(b'?').add(b'{').add(b'}');

/// https://url.spec.whatwg.org/#userinfo-percent-encode-set
pub(crate) const USERINFO: &AsciiSet = &PATH
.add(b'/')
.add(b':')
.add(b';')
.add(b'=')
.add(b'@')
.add(b'[')
.add(b'\\')
.add(b']')
.add(b'^')
.add(b'|');
use percent_encoding::{
ascii_set::{FRAGMENT, PATH, QUERY, SPECIAL_QUERY, USERINFO},
percent_encode, utf8_percent_encode, AsciiSet, CONTROLS,
};

pub(crate) const PATH_SEGMENT: &AsciiSet = &PATH.add(b'/').add(b'%');

// The backslash (\) character is treated as a path separator in special URLs
// so it needs to be additionally escaped in that case.
pub(crate) const SPECIAL_PATH_SEGMENT: &AsciiSet = &PATH_SEGMENT.add(b'\\');

// https://url.spec.whatwg.org/#query-state
const QUERY: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'#').add(b'<').add(b'>');
const SPECIAL_QUERY: &AsciiSet = &QUERY.add(b'\'');

pub type ParseResult<T> = Result<T, ParseError>;

macro_rules! simple_enum_error {
Expand Down