diff --git a/README.md b/README.md index eba84db..fb64bba 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,21 @@ Validate SMTP (RFC-5321) Mailbox addresses. ## Some notes -Length limitations are not checked. +** Domains are now checked beyond RFC-5321 syntax only ** -Technically a local-part of a Mailbox address is limited to 64 octets -or less, see: +Domain names must be fully qualified; that is with at least two labels. The top-level domain must have at least two octets. -Also, domain names are limited to 255 octets, see: - +** Length limitations are now checked ** -And individual labels within a domain name are limited to 63 -octets or less, see: - section 2.3.4. Size limits +Total length limit of an address is 986 octets; based on a 1,000 octet SMTP line length. + +See section 2.3.4. Size limits: + +Domain names are limited to 255 octets, when encoded with a length byte before each label, and including the top-level zero length label. So, the effctive limit with interstitial dots is 253 octets. + +Labels within a domain name are limited to 63 octets. + +The above are limits of the DNS protocol, not just a particular implementation. However, RFC-5321 section 4.5.3.1. “Size Limits and Minimums” says: diff --git a/main.cpp b/main.cpp index 8cbc5c0..da6c8b0 100644 --- a/main.cpp +++ b/main.cpp @@ -8,7 +8,6 @@ int main(int argc, char* argv[]) "simple@example.com", "very.common@example.com", "disposable.style.email.with+symbol@example.com", - "admin@mailserver1", "\" \"@example.org", "\"john..doe\"@example.org", "\"\"@example.org", @@ -30,6 +29,7 @@ int main(int argc, char* argv[]) } const char* bad_addresses[] = { + "admin@mailserver1", // used to be okay, now domain not fully qualified "\"john\\\\\"doe\"@example.org", "user@[300.0.0.1]", "user@[127.0.0.0.1]", @@ -43,6 +43,9 @@ int main(int argc, char* argv[]) "john..doe@example.com", "foo bar@example.com", "foo.bar@bad=domain.com", + "domain-too-long@XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.xyz", + "tld-too-short@foo.x", + "local-part@label_too_long_XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX.xyz" }; for (auto bad: bad_addresses) { diff --git a/smtp-address-validator.hpp b/smtp-address-validator.hpp index 5980e7d..f92a636 100644 --- a/smtp-address-validator.hpp +++ b/smtp-address-validator.hpp @@ -3,6 +3,6 @@ #include -bool is_address(std::string_view s); +bool is_address(std::string_view s) noexcept; #endif // SMTP_ADDRESS_PARTSER_HPP_INCLUDED diff --git a/smtp-address-validator.rl b/smtp-address-validator.rl index 1b55f95..fe72548 100644 --- a/smtp-address-validator.rl +++ b/smtp-address-validator.rl @@ -1,4 +1,6 @@ #include "smtp-address-validator.hpp" + +using namespace std::literals; %%{ machine address; @@ -111,8 +113,14 @@ main := Mailbox @{ result = true; } $err{ result = false; }; %% write data; -bool is_address(std::string_view s) +bool is_address(std::string_view s) noexcept { + // An insane length, to protect the parsing code from huge input. SMTP line limit, minus command size. + constexpr auto insane_length = 1000 - "MAIL FROM:<>\r\n"sv.length(); + + if (s.length() > insane_length) + return false; + int cs = 0; const char* p = s.begin(); @@ -124,5 +132,40 @@ bool is_address(std::string_view s) %% write init; %% write exec; + if (!result) // Failure to parse. + return false; + + const auto at_idx = s.find_last_of("@"sv); + const auto domain = s.substr(at_idx + 1); + + if (domain[0] == '[') // An address literal. + return true; + + // Further domain checks. + if (domain.length() > 253) + return false; + + bool tld = false; + + auto e = domain.length(); + for (std::string_view::size_type dot; + (dot = domain.substr(0, e).find_last_of("."sv)) != + std::string_view::npos; + e = dot) { + const auto label = domain.substr(dot + 1, e); + if (label.length() > 63) + return false; + if (!tld) { + if (label.length() < 2) + return false; // TLD too short + tld = true; + } + } + if (domain.substr(0, e).length() > 63) + return false; + + if (!tld) + return false; // domain not fully qualified + return result; }