-
Notifications
You must be signed in to change notification settings - Fork 335
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(percent-encoding): add RFC2396 ascii sets #971
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -66,7 +66,7 @@ use core::{fmt, mem, ops, slice, str}; | |||||
/// /// https://url.spec.whatwg.org/#fragment-percent-encode-set | ||||||
/// const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`'); | ||||||
/// ``` | ||||||
#[derive(Debug, PartialEq, Eq)] | ||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)] | ||||||
pub struct AsciiSet { | ||||||
mask: [Chunk; ASCII_RANGE_LEN / BITS_PER_CHUNK], | ||||||
} | ||||||
|
@@ -79,7 +79,7 @@ const BITS_PER_CHUNK: usize = 8 * mem::size_of::<Chunk>(); | |||||
|
||||||
impl AsciiSet { | ||||||
/// An empty set. | ||||||
pub const EMPTY: AsciiSet = AsciiSet { | ||||||
pub const EMPTY: &'static AsciiSet = &AsciiSet { | ||||||
mask: [0; ASCII_RANGE_LEN / BITS_PER_CHUNK], | ||||||
}; | ||||||
|
||||||
|
@@ -101,14 +101,26 @@ impl AsciiSet { | |||||
AsciiSet { mask } | ||||||
} | ||||||
|
||||||
pub const fn add_range(&self, start: u8, end: u8) -> Self { | ||||||
let mut new = AsciiSet { mask: self.mask }; | ||||||
|
||||||
let mut i = start; | ||||||
while i <= end { | ||||||
new = new.add(i); | ||||||
i += 1; | ||||||
} | ||||||
|
||||||
new | ||||||
} | ||||||
|
||||||
pub const fn remove(&self, byte: u8) -> Self { | ||||||
let mut mask = self.mask; | ||||||
mask[byte as usize / BITS_PER_CHUNK] &= !(1 << (byte as usize % BITS_PER_CHUNK)); | ||||||
AsciiSet { mask } | ||||||
} | ||||||
|
||||||
/// Return the union of two sets. | ||||||
pub const fn union(&self, other: Self) -> Self { | ||||||
pub const fn union(&self, other: &Self) -> Self { | ||||||
let mask = [ | ||||||
self.mask[0] | other.mask[0], | ||||||
self.mask[1] | other.mask[1], | ||||||
|
@@ -128,15 +140,31 @@ impl AsciiSet { | |||||
impl ops::Add for AsciiSet { | ||||||
type Output = Self; | ||||||
|
||||||
fn add(self, other: Self) -> Self { | ||||||
fn add(self, other: Self) -> Self::Output { | ||||||
self.union(&other) | ||||||
} | ||||||
} | ||||||
|
||||||
impl ops::Add for &AsciiSet { | ||||||
type Output = AsciiSet; | ||||||
|
||||||
fn add(self, other: Self) -> Self::Output { | ||||||
self.union(other) | ||||||
} | ||||||
} | ||||||
|
||||||
impl ops::Not for AsciiSet { | ||||||
type Output = Self; | ||||||
|
||||||
fn not(self) -> Self { | ||||||
fn not(self) -> Self::Output { | ||||||
self.complement() | ||||||
} | ||||||
} | ||||||
|
||||||
impl ops::Not for &AsciiSet { | ||||||
type Output = AsciiSet; | ||||||
|
||||||
fn not(self) -> Self::Output { | ||||||
self.complement() | ||||||
} | ||||||
} | ||||||
|
@@ -176,40 +204,100 @@ static_assert! { | |||||
/// Everything that is not an ASCII letter or digit. | ||||||
/// | ||||||
/// This is probably more eager than necessary in any context. | ||||||
pub const NON_ALPHANUMERIC: &AsciiSet = &CONTROLS | ||||||
.add(b' ') | ||||||
.add(b'!') | ||||||
.add(b'"') | ||||||
.add(b'#') | ||||||
.add(b'$') | ||||||
.add(b'%') | ||||||
.add(b'&') | ||||||
.add(b'\'') | ||||||
.add(b'(') | ||||||
.add(b')') | ||||||
.add(b'*') | ||||||
.add(b'+') | ||||||
.add(b',') | ||||||
.add(b'-') | ||||||
.add(b'.') | ||||||
.add(b'/') | ||||||
.add(b':') | ||||||
pub const NON_ALPHANUMERIC: &AsciiSet = &ALPHA_NUM.complement(); | ||||||
|
||||||
/// [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. non-blocking nit:
Suggested change
A good description like this helps users searching for things (e.g. with a search engine), and the direct link to the section that these are defined in the RFC is useful when you need to dig into various things (obviously it's less useful for this particular case, but it's generally useful to have this as a general thing I think). |
||||||
/// | ||||||
/// ```txt | ||||||
/// lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | | ||||||
/// "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | | ||||||
/// "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | ||||||
/// ``` | ||||||
pub const LOW_ALPHA: &AsciiSet = &AsciiSet::EMPTY.add_range(b'a', b'z'); | ||||||
|
||||||
/// [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
/// | ||||||
/// ```txt | ||||||
/// lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | | ||||||
/// "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | | ||||||
/// "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | ||||||
/// ``` | ||||||
pub const UP_ALPHA: &AsciiSet = &AsciiSet::EMPTY.add_range(b'A', b'Z'); | ||||||
|
||||||
/// [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
/// | ||||||
/// ```txt | ||||||
/// alpha = lowalpha | upalpha | ||||||
/// ``` | ||||||
pub const ALPHA: &AsciiSet = &LOW_ALPHA.union(UP_ALPHA); | ||||||
|
||||||
/// [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
/// | ||||||
/// ```txt | ||||||
/// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | | ||||||
/// "8" | "9" | ||||||
/// ``` | ||||||
pub const DIGIT: &AsciiSet = &AsciiSet::EMPTY.add_range(b'0', b'9'); | ||||||
|
||||||
/// [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
/// | ||||||
/// ```txt | ||||||
/// alphanum = alpha | digit | ||||||
/// ``` | ||||||
pub const ALPHA_NUM: &AsciiSet = &ALPHA.union(DIGIT); | ||||||
|
||||||
/// [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
/// | ||||||
/// ```txt | ||||||
/// reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | | ||||||
/// "$" | "," | ||||||
/// ``` | ||||||
/// | ||||||
/// Matches JavaScript's `decodeURI`. | ||||||
pub const RESERVED: &AsciiSet = &AsciiSet::EMPTY | ||||||
.add(b';') | ||||||
.add(b'<') | ||||||
.add(b'=') | ||||||
.add(b'>') | ||||||
.add(b'/') | ||||||
.add(b'?') | ||||||
.add(b':') | ||||||
.add(b'@') | ||||||
.add(b'&') | ||||||
.add(b'=') | ||||||
.add(b'+') | ||||||
.add(b'$') | ||||||
.add(b',') | ||||||
.add(b'[') | ||||||
.add(b'\\') | ||||||
.add(b']') | ||||||
.add(b'^') | ||||||
.add(b']'); | ||||||
|
||||||
/// [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
/// | ||||||
/// ```txt | ||||||
/// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" | ||||||
/// ``` | ||||||
pub const MARK: &AsciiSet = &AsciiSet::EMPTY | ||||||
.add(b'-') | ||||||
.add(b'_') | ||||||
.add(b'`') | ||||||
.add(b'{') | ||||||
.add(b'|') | ||||||
.add(b'}') | ||||||
.add(b'~'); | ||||||
.add(b'.') | ||||||
.add(b'!') | ||||||
.add(b'~') | ||||||
.add(b'*') | ||||||
.add(b'\'') | ||||||
.add(b'(') | ||||||
.add(b')'); | ||||||
|
||||||
/// [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
/// | ||||||
/// ```txt | ||||||
/// unreserved = alphanum | mark | ||||||
/// ``` | ||||||
/// | ||||||
/// Matches JavaScript's `encodeURIComponent` / `decodeURIComponent`. | ||||||
pub const UNRESERVED: &AsciiSet = &ALPHA_NUM.union(MARK); | ||||||
|
||||||
/// All reserved and unreserved characters, | ||||||
/// according to [RFC2396](https://datatracker.ietf.org/doc/html/rfc2396). | ||||||
/// | ||||||
/// Matches JavaScript's `encodeURI`. | ||||||
pub const UNRESERVED_RESERVED: &AsciiSet = &UNRESERVED.union(RESERVED); | ||||||
|
||||||
/// Return the percent-encoding of the given byte. | ||||||
/// | ||||||
|
@@ -542,8 +630,8 @@ mod tests { | |||||
/// useful for defining sets in a modular way. | ||||||
#[test] | ||||||
fn union() { | ||||||
const A: AsciiSet = AsciiSet::EMPTY.add(b'A'); | ||||||
const B: AsciiSet = AsciiSet::EMPTY.add(b'B'); | ||||||
const A: &AsciiSet = &AsciiSet::EMPTY.add(b'A'); | ||||||
const B: &AsciiSet = &AsciiSet::EMPTY.add(b'B'); | ||||||
const UNION: AsciiSet = A.union(B); | ||||||
const EXPECTED: AsciiSet = AsciiSet::EMPTY.add(b'A').add(b'B'); | ||||||
assert_eq!(UNION, EXPECTED); | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this took Range instead of two vars, then it would be more obvious that this was the half-open range (it's actually the inclusive range, but that's not obvious from the method name at all)