Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache MX and A server lookups #256

Merged
merged 6 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 27 additions & 10 deletions lib/valid_email2/address.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
require "resolv"
require "mail"
require "unicode/emoji"
require "valid_email2/dns_records_cache"

module ValidEmail2
class Address
Expand All @@ -25,9 +26,7 @@ def initialize(address, dns_timeout = 5, dns_nameserver = nil)
@parse_error = false
@raw_address = address
@dns_timeout = dns_timeout

@resolv_config = Resolv::DNS::Config.default_config_hash
@resolv_config[:nameserver] = dns_nameserver if dns_nameserver
@dns_nameserver = dns_nameserver

begin
@address = Mail::Address.new(address)
Expand Down Expand Up @@ -137,10 +136,24 @@ def address_contain_emoticons?
@raw_address.scan(Unicode::Emoji::REGEX).length >= 1
end

def resolv_config
@resolv_config ||= begin
config = Resolv::DNS::Config.default_config_hash
config[:nameserver] = @dns_nameserver if @dns_nameserver
config
end

@resolv_config
end

def mx_servers
@mx_servers ||= Resolv::DNS.open(@resolv_config) do |dns|
dns.timeouts = @dns_timeout
dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX)
@mx_servers_cache ||= ValidEmail2::DnsRecordsCache.new

@mx_servers_cache.fetch(address.domain.downcase) do
Resolv::DNS.open(resolv_config) do |dns|
dns.timeouts = @dns_timeout
dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX)
end
end
end

Expand All @@ -149,10 +162,14 @@ def null_mx?
end

def mx_or_a_servers
@mx_or_a_servers ||= Resolv::DNS.open(@resolv_config) do |dns|
dns.timeouts = @dns_timeout
(mx_servers.any? && mx_servers) ||
dns.getresources(address.domain, Resolv::DNS::Resource::IN::A)
@mx_or_a_servers_cache ||= ValidEmail2::DnsRecordsCache.new

@mx_or_a_servers_cache.fetch(address.domain.downcase) do
Resolv::DNS.open(resolv_config) do |dns|
dns.timeouts = @dns_timeout
(mx_servers.any? && mx_servers) ||
dns.getresources(address.domain, Resolv::DNS::Resource::IN::A)
end
end
end
end
Expand Down
37 changes: 37 additions & 0 deletions lib/valid_email2/dns_records_cache.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
module ValidEmail2
class DnsRecordsCache
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DnsRecordsCache

Definitely open to different/better naming for this!

MAX_CACHE_SIZE = 1_000

def initialize
# Cache structure: { domain (String): { records: [], cached_at: Time, ttl: Integer } }
@cache = {}
end

def fetch(domain, &block)
prune_cache if @cache.size > MAX_CACHE_SIZE

cache_entry = @cache[domain]

if cache_entry && (Time.now - cache_entry[:cached_at]) < cache_entry[:ttl]
return cache_entry[:records]
else
@cache.delete(domain)
end

records = block.call

if records.any?
ttl = records.map(&:ttl).min
@cache[domain] = { records: records, cached_at: Time.now, ttl: ttl }
end

records
end

def prune_cache
entries_sorted_by_cached_at_asc = (@cache.sort_by { |_domain, data| data[:cached_at] }).flatten
entries_to_remove = entries_sorted_by_cached_at_asc.first(@cache.size - MAX_CACHE_SIZE)
entries_to_remove.each { |domain| @cache.delete(domain) }
end
end
end
279 changes: 279 additions & 0 deletions spec/address_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,283 @@
expect(address.valid?).to eq true
end
end

describe "caching" do
let(:email_address) { "[email protected]" }
let(:email_instance) { described_class.new(email_address) }
let(:dns_records_cache_instance) { ValidEmail2::DnsRecordsCache.new }
let(:ttl) { 1_000 }
let(:mock_resolv_dns) { instance_double(Resolv::DNS) }
let(:mock_mx_records) { [double("MX", exchange: "mx.ymail.com", preference: 10, ttl: ttl)] }

before do
allow(email_instance).to receive(:null_mx?).and_return(false)
allow(Resolv::DNS).to receive(:open).and_yield(mock_resolv_dns)
allow(mock_resolv_dns).to receive(:timeouts=)
end

describe "#valid_strict_mx?" do
let(:cached_at) { Time.now }
let(:mock_cache_data) { { email_instance.address.domain => { records: mock_mx_records, cached_at: cached_at, ttl: ttl } } }

before do
allow(mock_resolv_dns).to receive(:getresources)
.with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX)
.and_return(mock_mx_records)
end

it "calls the MX servers lookup when the email is not cached" do
result = email_instance.valid_strict_mx?

expect(Resolv::DNS).to have_received(:open).once
expect(result).to be true
end

it "does not call the MX servers lookup when the email is cached" do
email_instance.valid_strict_mx?
email_instance.valid_strict_mx?

expect(Resolv::DNS).to have_received(:open).once
end

it "returns the cached result for subsequent calls" do
first_result = email_instance.valid_strict_mx?
expect(first_result).to be true

allow(mock_resolv_dns).to receive(:getresources)
.with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX)
.and_return([])

second_result = email_instance.valid_strict_mx?
expect(second_result).to be true
end

describe "ttl" do
before do
dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data)
allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance)
allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original
end

context "when the time since last lookup is less than the cached ttl entry" do
let(:cached_at) { Time.now }

it "does not call the MX servers lookup" do
email_instance.valid_strict_mx?

expect(Resolv::DNS).not_to have_received(:open)
end
end

context "when the time since last lookup is greater than the cached ttl entry" do
let(:cached_at) { Time.now - ttl }

it "calls the MX servers lookup" do
email_instance.valid_strict_mx?

expect(Resolv::DNS).to have_received(:open).once
end
end
end

describe "cache size" do
before do
dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data)
allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance)
allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original
end

context "when the cache size is less than or equal to the max cache size" do
before do
stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 1)
end

it "does not prune the cache" do
expect(dns_records_cache_instance).not_to receive(:prune_cache)

email_instance.valid_strict_mx?
end

it "does not call the MX servers lookup" do
email_instance.valid_strict_mx?

expect(Resolv::DNS).not_to have_received(:open)
end

context "and there are older cached entries" do
let(:mock_cache_data) { { "another_domain.com" => { records: mock_mx_records, cached_at: cached_at - 100, ttl: ttl } } }

it "does not prune those entries" do
email_instance.valid_strict_mx?

expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 2
expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain, "another_domain.com"])
end
end
end

context "when the cache size is greater than the max cache size" do
before do
stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 0)
end

it "prunes the cache" do
expect(dns_records_cache_instance).to receive(:prune_cache).once

email_instance.valid_strict_mx?
end

it "calls the the MX servers lookup" do
email_instance.valid_strict_mx?

expect(Resolv::DNS).to have_received(:open).once
end

context "and there are older cached entries" do
let(:mock_cache_data) { { "another_domain.com" => { records: mock_mx_records, cached_at: cached_at - 100, ttl: ttl } } }

it "prunes those entries" do
email_instance.valid_strict_mx?

expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 1
expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain])
end
end
end
end
end

describe "#valid_mx?" do
let(:cached_at) { Time.now }
let(:mock_cache_data) { { email_instance.address.domain => { records: mock_a_records, cached_at: cached_at, ttl: ttl } } }
let(:mock_a_records) { [double("A", address: "192.168.1.1", ttl: ttl)] }

before do
allow(email_instance).to receive(:mx_servers).and_return(mock_mx_records)
allow(mock_resolv_dns).to receive(:getresources)
.with(email_instance.address.domain, Resolv::DNS::Resource::IN::A)
.and_return(mock_a_records)
end

it "calls the MX or A servers lookup when the email is not cached" do
result = email_instance.valid_mx?

expect(Resolv::DNS).to have_received(:open).once
expect(result).to be true
end

it "does not call the MX or A servers lookup when the email is cached" do
email_instance.valid_mx?
email_instance.valid_mx?

expect(Resolv::DNS).to have_received(:open).once
end

it "returns the cached result for subsequent calls" do
first_result = email_instance.valid_mx?
expect(first_result).to be true

allow(mock_resolv_dns).to receive(:getresources)
.with(email_instance.address.domain, Resolv::DNS::Resource::IN::A)
.and_return([])

second_result = email_instance.valid_mx?
expect(second_result).to be true
end

describe "ttl" do
before do
dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data)
allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance)
allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original
end

context "when the time since last lookup is less than the cached ttl entry" do
let(:cached_at) { Time.now }

it "does not call the MX or A servers lookup" do
email_instance.valid_mx?

expect(Resolv::DNS).not_to have_received(:open)
end
end

context "when the time since last lookup is greater than the cached ttl entry" do
let(:cached_at) { Time.now - ttl }

it "calls the MX or A servers lookup " do
email_instance.valid_mx?

expect(Resolv::DNS).to have_received(:open).once
end
end
end

describe "cache size" do
before do
dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data)
allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance)
allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original
end

context "when the cache size is less than or equal to the max cache size" do
before do
stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 1)
end

it "does not prune the cache" do
expect(email_instance).not_to receive(:prune_cache)

email_instance.valid_mx?
end

it "does not call the MX or A servers lookup" do
email_instance.valid_mx?

expect(Resolv::DNS).not_to have_received(:open)
end

context "and there are older cached entries" do
let(:mock_cache_data) { { "another_domain.com" => { records: mock_a_records, cached_at: cached_at - 100, ttl: ttl } } }

it "does not prune those entries" do
email_instance.valid_mx?

expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 2
expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain, "another_domain.com"])
end
end
end

context "when the cache size is greater than the max cache size" do
before do
stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 0)
end

it "prunes the cache" do
expect(dns_records_cache_instance).to receive(:prune_cache).once

email_instance.valid_mx?
end

it "calls the MX or A servers lookup" do
email_instance.valid_mx?

expect(Resolv::DNS).to have_received(:open).once
end

context "and there are older cached entries" do
let(:mock_cache_data) { { "another_domain.com" => { records: mock_a_records, cached_at: cached_at - 100, ttl: ttl } } }

it "prunes those entries" do
email_instance.valid_mx?

expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 1
expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain])
end
end
end
end
end
end
end
1 change: 1 addition & 0 deletions spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
require 'rspec-benchmark'
RSpec.configure do |config|
config.include RSpec::Benchmark::Matchers
config.default_formatter = 'doc'
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I prefer this personally, but feel free to delete if you'd rather not have it.

end
RSpec::Benchmark.configure do |config|
config.disable_gc = true
Expand Down
Loading