From f7dfb02d48e317ddb4ee1cb9744a79ea4f28d7a3 Mon Sep 17 00:00:00 2001 From: Ian Bayne Date: Mon, 14 Oct 2024 11:40:24 +0900 Subject: [PATCH 1/6] new: cache mx and a server lookups --- lib/valid_email2/address.rb | 19 +++++++- spec/address_spec.rb | 86 +++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/lib/valid_email2/address.rb b/lib/valid_email2/address.rb index 734667e..3f377af 100644 --- a/lib/valid_email2/address.rb +++ b/lib/valid_email2/address.rb @@ -9,6 +9,9 @@ module ValidEmail2 class Address attr_accessor :address + @@mx_servers_cache = {} + @@mx_or_a_servers_cache = {} + PROHIBITED_DOMAIN_CHARACTERS_REGEX = /[+!_\/\s'`]/ DEFAULT_RECIPIENT_DELIMITER = '+' DOT_DELIMITER = '.' @@ -138,10 +141,16 @@ def address_contain_emoticons? end def mx_servers - @mx_servers ||= Resolv::DNS.open(@resolv_config) do |dns| + return @@mx_servers_cache[address.domain.downcase] if @@mx_servers_cache.key?(address.domain.downcase) + + result = Resolv::DNS.open(@resolv_config) do |dns| dns.timeouts = @dns_timeout dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX) end + + @@mx_servers_cache[address.domain.downcase] = result + + result end def null_mx? @@ -149,11 +158,17 @@ def null_mx? end def mx_or_a_servers - @mx_or_a_servers ||= Resolv::DNS.open(@resolv_config) do |dns| + return @@mx_or_a_servers_cache[address.domain.downcase] if @@mx_or_a_servers_cache.key?(address.domain.downcase) + + result = Resolv::DNS.open(@resolv_config) do |dns| dns.timeouts = @dns_timeout (mx_servers.any? && mx_servers) || dns.getresources(address.domain, Resolv::DNS::Resource::IN::A) end + + @@mx_or_a_servers_cache[address.domain.downcase] = result + + result end end end diff --git a/spec/address_spec.rb b/spec/address_spec.rb index 0f1165e..a6fdc5d 100644 --- a/spec/address_spec.rb +++ b/spec/address_spec.rb @@ -39,4 +39,90 @@ expect(address.valid?).to eq true end end + + describe "caching" do + let(:email_address) { "example@ymail.com" } + let(:email_instance) { described_class.new(email_address) } + let(:mock_resolv_dns) { instance_double(Resolv::DNS) } + let(:mock_mx_records) { [double('MX', exchange: 'mx.ymail.com', preference: 10)] } + + before do + allow(email_instance).to receive(:null_mx?).and_return(false) + allow(Resolv::DNS).to receive(:open).and_yield(mock_resolv_dns) + allow(mock_resolv_dns).to receive(:timeouts=) + end + + describe "#valid_strict_mx?" do + before do + described_class.class_variable_set(:@@mx_servers_cache, {}) + allow(mock_resolv_dns).to receive(:getresources) + .with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX) + .and_return(mock_mx_records) + end + + it "calls the MX servers lookup when the email is not cached" do + result = email_instance.valid_strict_mx? + + expect(Resolv::DNS).to have_received(:open).once + expect(result).to be true + end + + it "does not call the MX servers lookup when the email is cached" do + email_instance.valid_strict_mx? + email_instance.valid_strict_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + + it "returns the cached result for subsequent calls" do + first_result = email_instance.valid_strict_mx? + expect(first_result).to be true + + allow(mock_resolv_dns).to receive(:getresources) + .with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX) + .and_return([]) + + second_result = email_instance.valid_strict_mx? + expect(second_result).to be true + end + end + + describe "#valid_mx?" do + let(:mock_a_records) { [double('A', address: '192.168.1.1')] } + + before do + described_class.class_variable_set(:@@mx_or_a_servers_cache, {}) + allow(email_instance).to receive(:mx_servers).and_return(mock_mx_records) + allow(mock_resolv_dns).to receive(:getresources) + .with(email_instance.address.domain, Resolv::DNS::Resource::IN::A) + .and_return(mock_a_records) + end + + it "calls the MX or A servers lookup when the email is not cached" do + result = email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + expect(result).to be true + end + + it "does not call the MX or A servers lookup when the email is cached" do + email_instance.valid_mx? + email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + + it "returns the cached result for subsequent calls" do + first_result = email_instance.valid_mx? + expect(first_result).to be true + + allow(mock_resolv_dns).to receive(:getresources) + .with(email_instance.address.domain, Resolv::DNS::Resource::IN::A) + .and_return([]) + + second_result = email_instance.valid_mx? + expect(second_result).to be true + end + end + end end From 3384bc2d01b729bd5d0a571964bfde90d1a9a666 Mon Sep 17 00:00:00 2001 From: Ian Bayne Date: Wed, 16 Oct 2024 22:51:07 +0900 Subject: [PATCH 2/6] test: reset caches before each test --- spec/valid_email2_spec.rb | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/spec/valid_email2_spec.rb b/spec/valid_email2_spec.rb index 58b8543..05f8d48 100644 --- a/spec/valid_email2_spec.rb +++ b/spec/valid_email2_spec.rb @@ -79,6 +79,11 @@ class TestUserMultiple < TestModel let(:disposable_domain) { ValidEmail2.disposable_emails.first } + before do + ValidEmail2::Address.class_variable_set(:@@mx_servers_cache, {}) + ValidEmail2::Address.class_variable_set(:@@mx_or_a_servers_cache, {}) + end + describe "basic validation" do subject(:user) { TestUser.new(email: "") } From c80cee333a21ca52f3e1646884d3e231b2e8eaa2 Mon Sep 17 00:00:00 2001 From: Ian Bayne Date: Sat, 26 Oct 2024 11:58:00 +0900 Subject: [PATCH 3/6] new: bust the cache if the time since last lookup is greater than the cached ttl --- lib/valid_email2/address.rb | 41 +++++++++++++++++++++++++------- spec/address_spec.rb | 47 ++++++++++++++++++++++++++++++++----- 2 files changed, 74 insertions(+), 14 deletions(-) diff --git a/lib/valid_email2/address.rb b/lib/valid_email2/address.rb index 3f377af..d83eae5 100644 --- a/lib/valid_email2/address.rb +++ b/lib/valid_email2/address.rb @@ -9,6 +9,7 @@ module ValidEmail2 class Address attr_accessor :address + # Cache structure: { domain (String): { records: [], cached_at: Time, ttl: Integer } } @@mx_servers_cache = {} @@mx_or_a_servers_cache = {} @@ -141,16 +142,28 @@ def address_contain_emoticons? end def mx_servers - return @@mx_servers_cache[address.domain.downcase] if @@mx_servers_cache.key?(address.domain.downcase) + domain = address.domain.downcase + + if @@mx_servers_cache[domain] + cache_entry = @@mx_servers_cache[domain] + if (Time.now - cache_entry[:cached_at]) < cache_entry[:ttl] + return cache_entry[:records] + else + @@mx_servers_cache.delete(domain) + end + end - result = Resolv::DNS.open(@resolv_config) do |dns| + records = Resolv::DNS.open(@resolv_config) do |dns| dns.timeouts = @dns_timeout dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX) end - @@mx_servers_cache[address.domain.downcase] = result + if records.any? + ttl = records.map(&:ttl).min + @@mx_servers_cache[domain] = { records: records, cached_at: Time.now, ttl: ttl } + end - result + records end def null_mx? @@ -158,17 +171,29 @@ def null_mx? end def mx_or_a_servers - return @@mx_or_a_servers_cache[address.domain.downcase] if @@mx_or_a_servers_cache.key?(address.domain.downcase) + domain = address.domain.downcase + + if @@mx_or_a_servers_cache[domain] + cache_entry = @@mx_or_a_servers_cache[domain] + if (Time.now - cache_entry[:cached_at]) < cache_entry[:ttl] + return cache_entry[:records] + else + @@mx_or_a_servers_cache.delete(domain) + end + end - result = Resolv::DNS.open(@resolv_config) do |dns| + records = Resolv::DNS.open(@resolv_config) do |dns| dns.timeouts = @dns_timeout (mx_servers.any? && mx_servers) || dns.getresources(address.domain, Resolv::DNS::Resource::IN::A) end - @@mx_or_a_servers_cache[address.domain.downcase] = result + if records.any? + ttl = records.map(&:ttl).min + @@mx_or_a_servers_cache[domain] = { records: records, cached_at: Time.now, ttl: ttl } + end - result + records end end end diff --git a/spec/address_spec.rb b/spec/address_spec.rb index a6fdc5d..c968ffe 100644 --- a/spec/address_spec.rb +++ b/spec/address_spec.rb @@ -43,8 +43,9 @@ describe "caching" do let(:email_address) { "example@ymail.com" } let(:email_instance) { described_class.new(email_address) } + let(:ttl) { 1_000 } let(:mock_resolv_dns) { instance_double(Resolv::DNS) } - let(:mock_mx_records) { [double('MX', exchange: 'mx.ymail.com', preference: 10)] } + let(:mock_mx_records) { [double('MX', exchange: 'mx.ymail.com', preference: 10, ttl: ttl)] } before do allow(email_instance).to receive(:null_mx?).and_return(false) @@ -85,10 +86,26 @@ second_result = email_instance.valid_strict_mx? expect(second_result).to be true end + + it "does not call the MX servers lookup when the cached time since last lookup is less than the cached ttl entry" do + described_class.class_variable_set(:@@mx_servers_cache, { email_instance.address.domain => { records: mock_mx_records, cached_at: Time.now, ttl: ttl }}) + + email_instance.valid_strict_mx? + + expect(Resolv::DNS).not_to have_received(:open) + end + + it "calls the MX servers lookup when the cached time since last lookup is greater than the cached ttl entry" do + described_class.class_variable_set(:@@mx_servers_cache, { email_instance.address.domain => { records: mock_mx_records, cached_at: Time.now - ttl, ttl: ttl }}) # Cached 1 day ago + + email_instance.valid_strict_mx? + + expect(Resolv::DNS).to have_received(:open).once + end end describe "#valid_mx?" do - let(:mock_a_records) { [double('A', address: '192.168.1.1')] } + let(:mock_a_records) { [double('A', address: '192.168.1.1', ttl: ttl)] } before do described_class.class_variable_set(:@@mx_or_a_servers_cache, {}) @@ -98,11 +115,13 @@ .and_return(mock_a_records) end - it "calls the MX or A servers lookup when the email is not cached" do - result = email_instance.valid_mx? + context "when the email is not cached" do + it "calls the MX or A servers lookup" do + result = email_instance.valid_mx? - expect(Resolv::DNS).to have_received(:open).once - expect(result).to be true + expect(Resolv::DNS).to have_received(:open).once + expect(result).to be true + end end it "does not call the MX or A servers lookup when the email is cached" do @@ -123,6 +142,22 @@ second_result = email_instance.valid_mx? expect(second_result).to be true end + + it "does not call the MX or A servers lookup when the time since last lookup is less than the cached ttl entry" do + described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now, ttl: ttl }}) + + email_instance.valid_mx? + + expect(Resolv::DNS).not_to have_received(:open) + end + + it "calls the MX or A servers lookup when the time since last lookup is greater than the cached ttl entry" do + described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now - ttl, ttl: ttl }}) + + email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + end end end end From 7379ffbd22df760d6653d95c0729c08359f8998b Mon Sep 17 00:00:00 2001 From: Ian Bayne Date: Sat, 26 Oct 2024 13:19:42 +0900 Subject: [PATCH 4/6] new: prune the cache when too large --- lib/valid_email2/address.rb | 23 +++++-- spec/address_spec.rb | 120 +++++++++++++++++++++++++++++++++++- 2 files changed, 138 insertions(+), 5 deletions(-) diff --git a/lib/valid_email2/address.rb b/lib/valid_email2/address.rb index d83eae5..a4ae744 100644 --- a/lib/valid_email2/address.rb +++ b/lib/valid_email2/address.rb @@ -9,13 +9,14 @@ module ValidEmail2 class Address attr_accessor :address - # Cache structure: { domain (String): { records: [], cached_at: Time, ttl: Integer } } - @@mx_servers_cache = {} - @@mx_or_a_servers_cache = {} - PROHIBITED_DOMAIN_CHARACTERS_REGEX = /[+!_\/\s'`]/ DEFAULT_RECIPIENT_DELIMITER = '+' DOT_DELIMITER = '.' + MAX_CACHE_SIZE = 1_000 + + # Cache structure: { domain (String): { records: [], cached_at: Time, ttl: Integer } } + @@mx_servers_cache = {} + @@mx_or_a_servers_cache = {} def self.prohibited_domain_characters_regex @prohibited_domain_characters_regex ||= PROHIBITED_DOMAIN_CHARACTERS_REGEX @@ -142,6 +143,10 @@ def address_contain_emoticons? end def mx_servers + if @@mx_servers_cache.size > MAX_CACHE_SIZE + prune_cache(@@mx_servers_cache) + end + domain = address.domain.downcase if @@mx_servers_cache[domain] @@ -171,6 +176,10 @@ def null_mx? end def mx_or_a_servers + if @@mx_or_a_servers_cache.size > MAX_CACHE_SIZE + prune_cache(@@mx_or_a_servers_cache) + end + domain = address.domain.downcase if @@mx_or_a_servers_cache[domain] @@ -195,5 +204,11 @@ def mx_or_a_servers records end + + def prune_cache(cache) + entries_sorted_by_cached_at_asc = (cache.sort_by { |_domain, data| data[:cached_at] }).flatten + entries_to_remove = entries_sorted_by_cached_at_asc.first(cache.size - MAX_CACHE_SIZE) + entries_to_remove.each { |domain| cache.delete(domain) } + end end end diff --git a/spec/address_spec.rb b/spec/address_spec.rb index c968ffe..dd70236 100644 --- a/spec/address_spec.rb +++ b/spec/address_spec.rb @@ -102,6 +102,65 @@ expect(Resolv::DNS).to have_received(:open).once end + + it "does not prune the cache when the cache size is less than the max cache size" do + expect(email_instance).not_to receive(:prune_cache) + + email_instance.valid_strict_mx? + end + + it "prunes the cache when the cache size is greater than the max cache size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 0) + + expect(email_instance).to receive(:prune_cache).with(described_class.class_variable_get(:@@mx_servers_cache)).once + + email_instance.valid_strict_mx? + email_instance.valid_strict_mx? + end + + it "does not call the MX or A servers lookup when there is a cached entry for the domain and the cache size is less than the max cache size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 1) + described_class.class_variable_set(:@@mx_servers_cache, { email_instance.address.domain => { records: mock_mx_records, cached_at: Time.now, ttl: ttl }}) + + email_instance.valid_strict_mx? + + expect(Resolv::DNS).not_to have_received(:open) + end + + it "calls the MX or A servers lookup when there is a cached entry for the domain but the cache size is greater than the max cache size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 0) + described_class.class_variable_set(:@@mx_servers_cache, { email_instance.address.domain => { records: mock_mx_records, cached_at: Time.now, ttl: ttl }}) + + email_instance.valid_strict_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + + it "does not prune older entries when the cache size is less than the max size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 1) + described_class.class_variable_set(:@@mx_servers_cache, { + 'another_domain.com' => { + records: mock_mx_records, cached_at: Time.now - 100, ttl: ttl + } + }) + + email_instance.valid_strict_mx? + + expect(described_class.class_variable_get(:@@mx_servers_cache).keys).to match_array([email_instance.address.domain, 'another_domain.com']) + end + + it "prunes older entries when the cache size is greater than the max size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 0) + described_class.class_variable_set(:@@mx_servers_cache, { + 'another_domain.com' => { + records: mock_mx_records, cached_at: Time.now - 100, ttl: ttl + } + }) + + email_instance.valid_strict_mx? + + expect(described_class.class_variable_get(:@@mx_servers_cache).keys).to match_array([email_instance.address.domain]) + end end describe "#valid_mx?" do @@ -153,11 +212,70 @@ it "calls the MX or A servers lookup when the time since last lookup is greater than the cached ttl entry" do described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now - ttl, ttl: ttl }}) - + email_instance.valid_mx? expect(Resolv::DNS).to have_received(:open).once end + + it "does not prune the cache when the cache size is less than the max cache size" do + expect(email_instance).not_to receive(:prune_cache) + + email_instance.valid_mx? + end + + it "prunes the cache when the cache size is greater than the max cache size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 0) + + expect(email_instance).to receive(:prune_cache).with(described_class.class_variable_get(:@@mx_or_a_servers_cache)).once + + email_instance.valid_mx? + email_instance.valid_mx? + end + + it "does not call the MX or A servers lookup when there is a cached entry for the domain and the cache size is less than the max cache size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 1) + described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now, ttl: ttl }}) + + email_instance.valid_mx? + + expect(Resolv::DNS).not_to have_received(:open) + end + + it "calls the MX or A servers lookup when there is a cached entry for the domain but the cache size is greater than the max cache size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 0) + described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now, ttl: ttl }}) + + email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + + it "does not prune older entries when the cache size is less than the max size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 1) + described_class.class_variable_set(:@@mx_or_a_servers_cache, { + 'another_domain.com' => { + records: mock_a_records, cached_at: Time.now - 100, ttl: ttl + } + }) + + email_instance.valid_mx? + + expect(described_class.class_variable_get(:@@mx_or_a_servers_cache).keys).to match_array([email_instance.address.domain, 'another_domain.com']) + end + + it "prunes older entries when the cache size is greater than the max size" do + stub_const("#{described_class}::MAX_CACHE_SIZE", 0) + described_class.class_variable_set(:@@mx_or_a_servers_cache, { + 'another_domain.com' => { + records: mock_a_records, cached_at: Time.now - 100, ttl: ttl + } + }) + + email_instance.valid_mx? + + expect(described_class.class_variable_get(:@@mx_or_a_servers_cache).keys).to match_array([email_instance.address.domain]) + end end end end From a3575b95f48b617ebf1c9cd539a3d87607454ca6 Mon Sep 17 00:00:00 2001 From: Ian Bayne Date: Sat, 2 Nov 2024 14:01:00 +0900 Subject: [PATCH 5/6] change: move caching logic to separate class --- lib/valid_email2/address.rb | 70 ++----- lib/valid_email2/dns_records_cache.rb | 37 ++++ spec/address_spec.rb | 264 +++++++++++++++----------- spec/spec_helper.rb | 1 + spec/valid_email2_spec.rb | 5 - 5 files changed, 202 insertions(+), 175 deletions(-) create mode 100644 lib/valid_email2/dns_records_cache.rb diff --git a/lib/valid_email2/address.rb b/lib/valid_email2/address.rb index a4ae744..e0aea65 100644 --- a/lib/valid_email2/address.rb +++ b/lib/valid_email2/address.rb @@ -4,6 +4,7 @@ require "resolv" require "mail" require "unicode/emoji" +require "valid_email2/dns_records_cache" module ValidEmail2 class Address @@ -12,11 +13,6 @@ class Address PROHIBITED_DOMAIN_CHARACTERS_REGEX = /[+!_\/\s'`]/ DEFAULT_RECIPIENT_DELIMITER = '+' DOT_DELIMITER = '.' - MAX_CACHE_SIZE = 1_000 - - # Cache structure: { domain (String): { records: [], cached_at: Time, ttl: Integer } } - @@mx_servers_cache = {} - @@mx_or_a_servers_cache = {} def self.prohibited_domain_characters_regex @prohibited_domain_characters_regex ||= PROHIBITED_DOMAIN_CHARACTERS_REGEX @@ -143,32 +139,14 @@ def address_contain_emoticons? end def mx_servers - if @@mx_servers_cache.size > MAX_CACHE_SIZE - prune_cache(@@mx_servers_cache) - end - - domain = address.domain.downcase + @mx_servers_cache ||= ValidEmail2::DnsRecordsCache.new - if @@mx_servers_cache[domain] - cache_entry = @@mx_servers_cache[domain] - if (Time.now - cache_entry[:cached_at]) < cache_entry[:ttl] - return cache_entry[:records] - else - @@mx_servers_cache.delete(domain) + @mx_servers_cache.fetch(address.domain.downcase) do + Resolv::DNS.open(@resolv_config) do |dns| + dns.timeouts = @dns_timeout + dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX) end end - - records = Resolv::DNS.open(@resolv_config) do |dns| - dns.timeouts = @dns_timeout - dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX) - end - - if records.any? - ttl = records.map(&:ttl).min - @@mx_servers_cache[domain] = { records: records, cached_at: Time.now, ttl: ttl } - end - - records end def null_mx? @@ -176,39 +154,15 @@ def null_mx? end def mx_or_a_servers - if @@mx_or_a_servers_cache.size > MAX_CACHE_SIZE - prune_cache(@@mx_or_a_servers_cache) - end + @mx_or_a_servers_cache ||= ValidEmail2::DnsRecordsCache.new - domain = address.domain.downcase - - if @@mx_or_a_servers_cache[domain] - cache_entry = @@mx_or_a_servers_cache[domain] - if (Time.now - cache_entry[:cached_at]) < cache_entry[:ttl] - return cache_entry[:records] - else - @@mx_or_a_servers_cache.delete(domain) + @mx_or_a_servers_cache.fetch(address.domain.downcase) do + Resolv::DNS.open(@resolv_config) do |dns| + dns.timeouts = @dns_timeout + (mx_servers.any? && mx_servers) || + dns.getresources(address.domain, Resolv::DNS::Resource::IN::A) end end - - records = Resolv::DNS.open(@resolv_config) do |dns| - dns.timeouts = @dns_timeout - (mx_servers.any? && mx_servers) || - dns.getresources(address.domain, Resolv::DNS::Resource::IN::A) - end - - if records.any? - ttl = records.map(&:ttl).min - @@mx_or_a_servers_cache[domain] = { records: records, cached_at: Time.now, ttl: ttl } - end - - records - end - - def prune_cache(cache) - entries_sorted_by_cached_at_asc = (cache.sort_by { |_domain, data| data[:cached_at] }).flatten - entries_to_remove = entries_sorted_by_cached_at_asc.first(cache.size - MAX_CACHE_SIZE) - entries_to_remove.each { |domain| cache.delete(domain) } end end end diff --git a/lib/valid_email2/dns_records_cache.rb b/lib/valid_email2/dns_records_cache.rb new file mode 100644 index 0000000..72daa0d --- /dev/null +++ b/lib/valid_email2/dns_records_cache.rb @@ -0,0 +1,37 @@ +module ValidEmail2 + class DnsRecordsCache + MAX_CACHE_SIZE = 1_000 + + def initialize + # Cache structure: { domain (String): { records: [], cached_at: Time, ttl: Integer } } + @cache = {} + end + + def fetch(domain, &block) + prune_cache if @cache.size > MAX_CACHE_SIZE + + cache_entry = @cache[domain] + + if cache_entry && (Time.now - cache_entry[:cached_at]) < cache_entry[:ttl] + return cache_entry[:records] + else + @cache.delete(domain) + end + + records = block.call + + if records.any? + ttl = records.map(&:ttl).min + @cache[domain] = { records: records, cached_at: Time.now, ttl: ttl } + end + + records + end + + def prune_cache + entries_sorted_by_cached_at_asc = (@cache.sort_by { |_domain, data| data[:cached_at] }).flatten + entries_to_remove = entries_sorted_by_cached_at_asc.first(@cache.size - MAX_CACHE_SIZE) + entries_to_remove.each { |domain| @cache.delete(domain) } + end + end +end \ No newline at end of file diff --git a/spec/address_spec.rb b/spec/address_spec.rb index dd70236..0861857 100644 --- a/spec/address_spec.rb +++ b/spec/address_spec.rb @@ -43,9 +43,10 @@ describe "caching" do let(:email_address) { "example@ymail.com" } let(:email_instance) { described_class.new(email_address) } + let(:dns_records_cache_instance) { ValidEmail2::DnsRecordsCache.new } let(:ttl) { 1_000 } let(:mock_resolv_dns) { instance_double(Resolv::DNS) } - let(:mock_mx_records) { [double('MX', exchange: 'mx.ymail.com', preference: 10, ttl: ttl)] } + let(:mock_mx_records) { [double("MX", exchange: "mx.ymail.com", preference: 10, ttl: ttl)] } before do allow(email_instance).to receive(:null_mx?).and_return(false) @@ -54,8 +55,10 @@ end describe "#valid_strict_mx?" do + let(:cached_at) { Time.now } + let(:mock_cache_data) { { email_instance.address.domain => { records: mock_mx_records, cached_at: cached_at, ttl: ttl } } } + before do - described_class.class_variable_set(:@@mx_servers_cache, {}) allow(mock_resolv_dns).to receive(:getresources) .with(email_instance.address.domain, Resolv::DNS::Resource::IN::MX) .and_return(mock_mx_records) @@ -87,100 +90,118 @@ expect(second_result).to be true end - it "does not call the MX servers lookup when the cached time since last lookup is less than the cached ttl entry" do - described_class.class_variable_set(:@@mx_servers_cache, { email_instance.address.domain => { records: mock_mx_records, cached_at: Time.now, ttl: ttl }}) + describe "ttl" do + before do + dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data) + allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance) + allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original + end - email_instance.valid_strict_mx? + context "when the time since last lookup is less than the cached ttl entry" do + let(:cached_at) { Time.now } - expect(Resolv::DNS).not_to have_received(:open) - end + it "does not call the MX servers lookup" do + email_instance.valid_strict_mx? - it "calls the MX servers lookup when the cached time since last lookup is greater than the cached ttl entry" do - described_class.class_variable_set(:@@mx_servers_cache, { email_instance.address.domain => { records: mock_mx_records, cached_at: Time.now - ttl, ttl: ttl }}) # Cached 1 day ago - - email_instance.valid_strict_mx? + expect(Resolv::DNS).not_to have_received(:open) + end + end - expect(Resolv::DNS).to have_received(:open).once - end + context "when the time since last lookup is greater than the cached ttl entry" do + let(:cached_at) { Time.now - ttl } - it "does not prune the cache when the cache size is less than the max cache size" do - expect(email_instance).not_to receive(:prune_cache) + it "calls the MX servers lookup" do + email_instance.valid_strict_mx? - email_instance.valid_strict_mx? + expect(Resolv::DNS).to have_received(:open).once + end + end end - it "prunes the cache when the cache size is greater than the max cache size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 0) + describe "cache size" do + before do + dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data) + allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance) + allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original + end - expect(email_instance).to receive(:prune_cache).with(described_class.class_variable_get(:@@mx_servers_cache)).once + context "when the cache size is less than or equal to the max cache size" do + before do + stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 1) + end - email_instance.valid_strict_mx? - email_instance.valid_strict_mx? - end + it "does not prune the cache" do + expect(dns_records_cache_instance).not_to receive(:prune_cache) - it "does not call the MX or A servers lookup when there is a cached entry for the domain and the cache size is less than the max cache size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 1) - described_class.class_variable_set(:@@mx_servers_cache, { email_instance.address.domain => { records: mock_mx_records, cached_at: Time.now, ttl: ttl }}) + email_instance.valid_strict_mx? + end - email_instance.valid_strict_mx? + it "does not call the MX servers lookup" do + email_instance.valid_strict_mx? - expect(Resolv::DNS).not_to have_received(:open) - end + expect(Resolv::DNS).not_to have_received(:open) + end - it "calls the MX or A servers lookup when there is a cached entry for the domain but the cache size is greater than the max cache size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 0) - described_class.class_variable_set(:@@mx_servers_cache, { email_instance.address.domain => { records: mock_mx_records, cached_at: Time.now, ttl: ttl }}) + context "and there are older cached entries" do + let(:mock_cache_data) { { "another_domain.com" => { records: mock_mx_records, cached_at: cached_at - 100, ttl: ttl } } } - email_instance.valid_strict_mx? + it "does not prune those entries" do + email_instance.valid_strict_mx? - expect(Resolv::DNS).to have_received(:open).once - end + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 2 + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain, "another_domain.com"]) + end + end + end - it "does not prune older entries when the cache size is less than the max size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 1) - described_class.class_variable_set(:@@mx_servers_cache, { - 'another_domain.com' => { - records: mock_mx_records, cached_at: Time.now - 100, ttl: ttl - } - }) + context "when the cache size is greater than the max cache size" do + before do + stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 0) + end - email_instance.valid_strict_mx? + it "prunes the cache" do + expect(dns_records_cache_instance).to receive(:prune_cache).once - expect(described_class.class_variable_get(:@@mx_servers_cache).keys).to match_array([email_instance.address.domain, 'another_domain.com']) - end + email_instance.valid_strict_mx? + end - it "prunes older entries when the cache size is greater than the max size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 0) - described_class.class_variable_set(:@@mx_servers_cache, { - 'another_domain.com' => { - records: mock_mx_records, cached_at: Time.now - 100, ttl: ttl - } - }) + it "calls the the MX servers lookup" do + email_instance.valid_strict_mx? - email_instance.valid_strict_mx? + expect(Resolv::DNS).to have_received(:open).once + end + + context "and there are older cached entries" do + let(:mock_cache_data) { { "another_domain.com" => { records: mock_mx_records, cached_at: cached_at - 100, ttl: ttl } } } - expect(described_class.class_variable_get(:@@mx_servers_cache).keys).to match_array([email_instance.address.domain]) + it "prunes those entries" do + email_instance.valid_strict_mx? + + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 1 + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain]) + end + end + end end end describe "#valid_mx?" do - let(:mock_a_records) { [double('A', address: '192.168.1.1', ttl: ttl)] } + let(:cached_at) { Time.now } + let(:mock_cache_data) { { email_instance.address.domain => { records: mock_a_records, cached_at: cached_at, ttl: ttl } } } + let(:mock_a_records) { [double("A", address: "192.168.1.1", ttl: ttl)] } before do - described_class.class_variable_set(:@@mx_or_a_servers_cache, {}) allow(email_instance).to receive(:mx_servers).and_return(mock_mx_records) allow(mock_resolv_dns).to receive(:getresources) .with(email_instance.address.domain, Resolv::DNS::Resource::IN::A) .and_return(mock_a_records) end - context "when the email is not cached" do - it "calls the MX or A servers lookup" do - result = email_instance.valid_mx? + it "calls the MX or A servers lookup when the email is not cached" do + result = email_instance.valid_mx? - expect(Resolv::DNS).to have_received(:open).once - expect(result).to be true - end + expect(Resolv::DNS).to have_received(:open).once + expect(result).to be true end it "does not call the MX or A servers lookup when the email is cached" do @@ -202,80 +223,99 @@ expect(second_result).to be true end - it "does not call the MX or A servers lookup when the time since last lookup is less than the cached ttl entry" do - described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now, ttl: ttl }}) + describe "ttl" do + before do + dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data) + allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance) + allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original + end - email_instance.valid_mx? + context "when the time since last lookup is less than the cached ttl entry" do + let(:cached_at) { Time.now } - expect(Resolv::DNS).not_to have_received(:open) - end + it "does not call the MX or A servers lookup" do + email_instance.valid_mx? - it "calls the MX or A servers lookup when the time since last lookup is greater than the cached ttl entry" do - described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now - ttl, ttl: ttl }}) + expect(Resolv::DNS).not_to have_received(:open) + end + end - email_instance.valid_mx? + context "when the time since last lookup is greater than the cached ttl entry" do + let(:cached_at) { Time.now - ttl } - expect(Resolv::DNS).to have_received(:open).once + it "calls the MX or A servers lookup " do + email_instance.valid_mx? + + expect(Resolv::DNS).to have_received(:open).once + end + end end - it "does not prune the cache when the cache size is less than the max cache size" do - expect(email_instance).not_to receive(:prune_cache) + describe "cache size" do + before do + dns_records_cache_instance.instance_variable_set(:@cache, mock_cache_data) + allow(ValidEmail2::DnsRecordsCache).to receive(:new).and_return(dns_records_cache_instance) + allow(dns_records_cache_instance).to receive(:fetch).with(email_instance.address.domain).and_call_original + end - email_instance.valid_mx? - end + context "when the cache size is less than or equal to the max cache size" do + before do + stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 1) + end - it "prunes the cache when the cache size is greater than the max cache size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 0) + it "does not prune the cache" do + expect(email_instance).not_to receive(:prune_cache) - expect(email_instance).to receive(:prune_cache).with(described_class.class_variable_get(:@@mx_or_a_servers_cache)).once + email_instance.valid_mx? + end - email_instance.valid_mx? - email_instance.valid_mx? - end + it "does not call the MX or A servers lookup" do + email_instance.valid_mx? - it "does not call the MX or A servers lookup when there is a cached entry for the domain and the cache size is less than the max cache size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 1) - described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now, ttl: ttl }}) + expect(Resolv::DNS).not_to have_received(:open) + end - email_instance.valid_mx? + context "and there are older cached entries" do + let(:mock_cache_data) { { "another_domain.com" => { records: mock_a_records, cached_at: cached_at - 100, ttl: ttl } } } - expect(Resolv::DNS).not_to have_received(:open) - end + it "does not prune those entries" do + email_instance.valid_mx? - it "calls the MX or A servers lookup when there is a cached entry for the domain but the cache size is greater than the max cache size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 0) - described_class.class_variable_set(:@@mx_or_a_servers_cache, { email_instance.address.domain => { records: mock_a_records, cached_at: Time.now, ttl: ttl }}) + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 2 + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain, "another_domain.com"]) + end + end + end - email_instance.valid_mx? + context "when the cache size is greater than the max cache size" do + before do + stub_const("ValidEmail2::DnsRecordsCache::MAX_CACHE_SIZE", 0) + end - expect(Resolv::DNS).to have_received(:open).once - end + it "prunes the cache" do + expect(dns_records_cache_instance).to receive(:prune_cache).once - it "does not prune older entries when the cache size is less than the max size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 1) - described_class.class_variable_set(:@@mx_or_a_servers_cache, { - 'another_domain.com' => { - records: mock_a_records, cached_at: Time.now - 100, ttl: ttl - } - }) + email_instance.valid_mx? + end - email_instance.valid_mx? + it "calls the MX or A servers lookup" do + email_instance.valid_mx? - expect(described_class.class_variable_get(:@@mx_or_a_servers_cache).keys).to match_array([email_instance.address.domain, 'another_domain.com']) - end + expect(Resolv::DNS).to have_received(:open).once + end - it "prunes older entries when the cache size is greater than the max size" do - stub_const("#{described_class}::MAX_CACHE_SIZE", 0) - described_class.class_variable_set(:@@mx_or_a_servers_cache, { - 'another_domain.com' => { - records: mock_a_records, cached_at: Time.now - 100, ttl: ttl - } - }) + context "and there are older cached entries" do + let(:mock_cache_data) { { "another_domain.com" => { records: mock_a_records, cached_at: cached_at - 100, ttl: ttl } } } - email_instance.valid_mx? + it "prunes those entries" do + email_instance.valid_mx? - expect(described_class.class_variable_get(:@@mx_or_a_servers_cache).keys).to match_array([email_instance.address.domain]) - end + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys.size).to eq 1 + expect(dns_records_cache_instance.instance_variable_get(:@cache).keys).to match_array([email_instance.address.domain]) + end + end + end + end end end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 3a2a1ec..d5e02cc 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -5,6 +5,7 @@ require 'rspec-benchmark' RSpec.configure do |config| config.include RSpec::Benchmark::Matchers + config.default_formatter = 'doc' end RSpec::Benchmark.configure do |config| config.disable_gc = true diff --git a/spec/valid_email2_spec.rb b/spec/valid_email2_spec.rb index 05f8d48..58b8543 100644 --- a/spec/valid_email2_spec.rb +++ b/spec/valid_email2_spec.rb @@ -79,11 +79,6 @@ class TestUserMultiple < TestModel let(:disposable_domain) { ValidEmail2.disposable_emails.first } - before do - ValidEmail2::Address.class_variable_set(:@@mx_servers_cache, {}) - ValidEmail2::Address.class_variable_set(:@@mx_or_a_servers_cache, {}) - end - describe "basic validation" do subject(:user) { TestUser.new(email: "") } From eb794f223d099376bac12a77ec9a5459f1989a59 Mon Sep 17 00:00:00 2001 From: Ian Bayne Date: Sun, 3 Nov 2024 08:37:00 +0900 Subject: [PATCH 6/6] change: set up resolv config outside initialize method See https://github.com/micke/valid_email2/pull/256#issuecomment-2453059404 --- lib/valid_email2/address.rb | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/valid_email2/address.rb b/lib/valid_email2/address.rb index e0aea65..ed00ed3 100644 --- a/lib/valid_email2/address.rb +++ b/lib/valid_email2/address.rb @@ -26,9 +26,7 @@ def initialize(address, dns_timeout = 5, dns_nameserver = nil) @parse_error = false @raw_address = address @dns_timeout = dns_timeout - - @resolv_config = Resolv::DNS::Config.default_config_hash - @resolv_config[:nameserver] = dns_nameserver if dns_nameserver + @dns_nameserver = dns_nameserver begin @address = Mail::Address.new(address) @@ -138,11 +136,21 @@ def address_contain_emoticons? @raw_address.scan(Unicode::Emoji::REGEX).length >= 1 end + def resolv_config + @resolv_config ||= begin + config = Resolv::DNS::Config.default_config_hash + config[:nameserver] = @dns_nameserver if @dns_nameserver + config + end + + @resolv_config + end + def mx_servers @mx_servers_cache ||= ValidEmail2::DnsRecordsCache.new @mx_servers_cache.fetch(address.domain.downcase) do - Resolv::DNS.open(@resolv_config) do |dns| + Resolv::DNS.open(resolv_config) do |dns| dns.timeouts = @dns_timeout dns.getresources(address.domain, Resolv::DNS::Resource::IN::MX) end @@ -157,7 +165,7 @@ def mx_or_a_servers @mx_or_a_servers_cache ||= ValidEmail2::DnsRecordsCache.new @mx_or_a_servers_cache.fetch(address.domain.downcase) do - Resolv::DNS.open(@resolv_config) do |dns| + Resolv::DNS.open(resolv_config) do |dns| dns.timeouts = @dns_timeout (mx_servers.any? && mx_servers) || dns.getresources(address.domain, Resolv::DNS::Resource::IN::A)