Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

deps: update ada to 2.1.0 #47598

Merged
merged 1 commit into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 79 additions & 14 deletions deps/ada/ada.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2023-03-30 17:00:48 -0400. Do not edit! */
/* auto-generated on 2023-04-17 12:20:41 -0400. Do not edit! */
/* begin file src/ada.cpp */
#include "ada.h"
/* begin file src/checkers.cpp */
Expand Down Expand Up @@ -2753,7 +2753,7 @@ bool ascii_has_upper_case(char* input, size_t length) {
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
uint64_t broadcast_80 = broadcast(0x80);
uint64_t broadcast_Ap = broadcast(128 - 'A');
uint64_t broadcast_Zp = broadcast(128 - 'Z');
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
size_t i = 0;

uint64_t runner{0};
Expand All @@ -2775,7 +2775,7 @@ void ascii_map(char* input, size_t length) {
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
uint64_t broadcast_80 = broadcast(0x80);
uint64_t broadcast_Ap = broadcast(128 - 'A');
uint64_t broadcast_Zp = broadcast(128 - 'Z');
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
size_t i = 0;

for (; i + 7 < length; i += 8) {
Expand Down Expand Up @@ -9845,7 +9845,7 @@ constexpr bool to_lower_ascii(char* input, size_t length) noexcept {
auto broadcast = [](uint8_t v) -> uint64_t { return 0x101010101010101 * v; };
uint64_t broadcast_80 = broadcast(0x80);
uint64_t broadcast_Ap = broadcast(128 - 'A');
uint64_t broadcast_Zp = broadcast(128 - 'Z');
uint64_t broadcast_Zp = broadcast(128 - 'Z' - 1);
uint64_t non_ascii = 0;
size_t i = 0;

Expand Down Expand Up @@ -9961,7 +9961,7 @@ ada_really_inline constexpr bool is_forbidden_domain_code_point(
}

ada_really_inline constexpr bool contains_forbidden_domain_code_point(
char* input, size_t length) noexcept {
const char* input, size_t length) noexcept {
size_t i = 0;
uint8_t accumulator{};
for (; i + 4 <= length; i += 4) {
Expand All @@ -9976,6 +9976,44 @@ ada_really_inline constexpr bool contains_forbidden_domain_code_point(
return accumulator;
}

constexpr static uint8_t is_forbidden_domain_code_point_table_or_upper[] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};

static_assert(sizeof(is_forbidden_domain_code_point_table_or_upper) == 256);
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('A')] == 2);
static_assert(is_forbidden_domain_code_point_table_or_upper[uint8_t('Z')] == 2);

ada_really_inline constexpr bool contains_forbidden_domain_code_point_or_upper(
const char* input, size_t length) noexcept {
size_t i = 0;
uint8_t accumulator{};
for (; i + 4 <= length; i += 4) {
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i])];
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 1])];
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 2])];
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i + 3])];
}
for (; i < length; i++) {
accumulator |=
is_forbidden_domain_code_point_table_or_upper[uint8_t(input[i])];
}
return accumulator;
}

static_assert(unicode::is_forbidden_domain_code_point('%'));
static_assert(unicode::is_forbidden_domain_code_point('\x7f'));
static_assert(unicode::is_forbidden_domain_code_point('\0'));
Expand Down Expand Up @@ -13473,23 +13511,50 @@ ada_really_inline bool url_aggregator::parse_host(std::string_view input) {
// to ASCII with domain and false. The most common case is an ASCII input, in
// which case we do not need to call the expensive 'to_ascii' if a few
// conditions are met: no '%' and no 'xn-' subsequence.
std::string _buffer = std::string(input);
// This next function checks that the result is ascii, but we are going to
// to check anyhow with is_forbidden.
// bool is_ascii =
unicode::to_lower_ascii(_buffer.data(), _buffer.size());
bool is_forbidden = unicode::contains_forbidden_domain_code_point(
_buffer.data(), _buffer.size());
if (is_forbidden == 0 && _buffer.find("xn-") == std::string_view::npos) {

// Often, the input does not contain any forbidden code points, and no upper
// case ASCII letter, then we can just copy it to the buffer. We want to
// optimize for such a common case.
uint8_t is_forbidden_or_upper =
unicode::contains_forbidden_domain_code_point_or_upper(input.data(),
input.size());
// Minor optimization opportunity:
// contains_forbidden_domain_code_point_or_upper could be extend to check for
// the presence of characters that cannot appear in the ipv4 address and we
// could also check whether x and n and - are present, and so we could skip
// some of the checks below. However, the gains are likely to be small, and
// the code would be more complex.
if (is_forbidden_or_upper == 0 &&
input.find("xn-") == std::string_view::npos) {
// fast path
update_base_hostname(_buffer);
update_base_hostname(input);
if (checkers::is_ipv4(get_hostname())) {
ada_log("parse_host fast path ipv4");
return parse_ipv4(get_hostname());
}
ada_log("parse_host fast path ", get_hostname());
return true;
} else if (is_forbidden_or_upper == 2) {
// We have encountered at least one upper case ASCII letter, let us
// try to convert it to lower case. If there is no 'xn-' in the result,
// we can then use a secondary fast path.
std::string _buffer = std::string(input);
unicode::to_lower_ascii(_buffer.data(), _buffer.size());
if (input.find("xn-") == std::string_view::npos) {
// secondary fast path when input is not all lower case
update_base_hostname(input);
if (checkers::is_ipv4(get_hostname())) {
ada_log("parse_host fast path ipv4");
return parse_ipv4(get_hostname());
}
ada_log("parse_host fast path ", get_hostname());
return true;
}
}
// We have encountered at least one forbidden code point or the input contains
// 'xn-' (case insensitive), so we need to call 'to_ascii' to perform the full
// conversion.

ada_log("parse_host calling to_ascii");
std::optional<std::string> host = std::string(get_hostname());
is_valid = ada::unicode::to_ascii(host, input, input.find('%'));
Expand Down
19 changes: 14 additions & 5 deletions deps/ada/ada.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* auto-generated on 2023-03-30 17:00:48 -0400. Do not edit! */
/* auto-generated on 2023-04-17 12:20:41 -0400. Do not edit! */
/* begin file include/ada.h */
/**
* @file ada.h
Expand Down Expand Up @@ -1418,11 +1418,20 @@ ada_really_inline constexpr bool is_forbidden_host_code_point(
const char c) noexcept;

/**
* Checks if the input is a forbidden domain code point.
* Checks if the input contains a forbidden domain code point.
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
*/
ada_really_inline constexpr bool contains_forbidden_domain_code_point(
char* input, size_t length) noexcept;
const char* input, size_t length) noexcept;

/**
* Checks if the input contains a forbidden domain code point in which case
* the first bit is set to 1. If the input contains an upper case ASCII letter,
* then the second bit is set to 1.
* @see https://url.spec.whatwg.org/#forbidden-domain-code-point
*/
ada_really_inline constexpr bool contains_forbidden_domain_code_point_or_upper(
const char* input, size_t length) noexcept;

/**
* Checks if the input is a forbidden doamin code point.
Expand Down Expand Up @@ -6503,13 +6512,13 @@ inline std::ostream &operator<<(std::ostream &out,
#ifndef ADA_ADA_VERSION_H
#define ADA_ADA_VERSION_H

#define ADA_VERSION "2.0.0"
#define ADA_VERSION "2.1.0"

namespace ada {

enum {
ADA_VERSION_MAJOR = 2,
ADA_VERSION_MINOR = 0,
ADA_VERSION_MINOR = 1,
ADA_VERSION_REVISION = 0,
};

Expand Down