From 39e1d867c660e361aa2f0f3f11aa398636324956 Mon Sep 17 00:00:00 2001 From: Manabu Niseki Date: Sat, 20 Jan 2024 19:12:11 +0900 Subject: [PATCH 1/3] refactor: renew parallel execution --- lib/mihari/actor.rb | 7 +++++++ lib/mihari/analyzers/base.rb | 7 ------- lib/mihari/enrichers/whois.rb | 2 +- lib/mihari/rule.rb | 30 ++++++++++++++++++++++++------ lib/mihari/schemas/options.rb | 7 ++----- spec/commands/search_spec.rb | 1 - spec/rule_spec.rb | 4 ---- 7 files changed, 34 insertions(+), 24 deletions(-) diff --git a/lib/mihari/actor.rb b/lib/mihari/actor.rb index 833bafb90..9f297207e 100644 --- a/lib/mihari/actor.rb +++ b/lib/mihari/actor.rb @@ -50,6 +50,13 @@ def timeout options[:timeout] end + # + # @return [Boolean] + # + def parallel? + options[:parallel] || Mihari.config.parallel + end + def validate_configuration! return if configured? diff --git a/lib/mihari/analyzers/base.rb b/lib/mihari/analyzers/base.rb index 4d41a218f..196de18ed 100644 --- a/lib/mihari/analyzers/base.rb +++ b/lib/mihari/analyzers/base.rb @@ -40,13 +40,6 @@ def ignore_error? options[:ignore_error] || Mihari.config.ignore_error end - # - # @return [Boolean] - # - def parallel? - options[:parallel] || Mihari.config.parallel - end - # @return [Array, Array] def artifacts raise NotImplementedError, "You must implement #{self.class}##{__method__}" diff --git a/lib/mihari/enrichers/whois.rb b/lib/mihari/enrichers/whois.rb index 63cff39ac..a1ee7236a 100644 --- a/lib/mihari/enrichers/whois.rb +++ b/lib/mihari/enrichers/whois.rb @@ -18,7 +18,7 @@ class Whois < Base # @return [Mihari::Models::WhoisRecord, nil] # def call(artifact) - artifact.whois_record ||= memoized_call(PublicSuffix.domain(artifact.domain)) + artifact.whois_record ||= memoized_call(PublicSuffix.domain(artifact.domain)).dup end private diff --git a/lib/mihari/rule.rb b/lib/mihari/rule.rb index 31a11984c..df3bba72e 100644 --- a/lib/mihari/rule.rb +++ b/lib/mihari/rule.rb @@ -174,11 +174,10 @@ def unique_artifacts # @return [Array] # def enriched_artifacts - # TODO: same whois query can be issued multiple times - @enriched_artifacts ||= Parallel.map(unique_artifacts) do |artifact| - enrichers.each do |enricher| - enricher.result(artifact) if enricher.callable?(artifact) - end + @enriched_artifacts ||= unique_artifacts.map do |artifact| + serial_enrichers.each { |enricher| enricher.result(artifact) } + Parallel.each(parallel_enrichers) { |enricher| enricher.result(artifact) } + artifact end end @@ -191,7 +190,10 @@ def enriched_artifacts def bulk_emit return [] if enriched_artifacts.empty? - Parallel.map(emitters) { |emitter| emitter.result(enriched_artifacts).value_or nil }.compact + [].tap do |out| + out << serial_emitters.map { |emitter| emitter.result(enriched_artifacts).value_or(nil) } + out << Parallel.map(parallel_emitters) { |emitter| emitter.result(enriched_artifacts).value_or(nil) } + end.flatten.compact end # @@ -368,6 +370,14 @@ def emitters end end + def parallel_emitters + emitters.select(&:parallel?) + end + + def serial_emitters + emitters.reject(&:parallel?) + end + # # Get enricher class # @@ -394,6 +404,14 @@ def enrichers end end + def parallel_enrichers + enrichers.select(&:parallel?) + end + + def serial_enrichers + enrichers.reject(&:parallel?) + end + # # Validate the data format # diff --git a/lib/mihari/schemas/options.rb b/lib/mihari/schemas/options.rb index cf50bc5be..6849a5500 100644 --- a/lib/mihari/schemas/options.rb +++ b/lib/mihari/schemas/options.rb @@ -7,17 +7,14 @@ module Schemas optional(:retry_interval).value(:integer).default(Mihari.config.retry_interval) optional(:retry_exponential_backoff).value(:bool).default(Mihari.config.retry_exponential_backoff) optional(:timeout).value(:integer) + optional(:parallel).value(:bool).default(Mihari.config.parallel) end IgnoreErrorOptions = Dry::Schema.Params do optional(:ignore_error).value(:bool).default(Mihari.config.ignore_error) end - ParallelOptions = Dry::Schema.Params do - optional(:parallel).value(:bool).default(Mihari.config.parallel) - end - - AnalyzerOptions = Options | IgnoreErrorOptions | ParallelOptions + AnalyzerOptions = Options | IgnoreErrorOptions PaginationOptions = Dry::Schema.Params do optional(:pagination_interval).value(:integer).default(Mihari.config.pagination_interval) diff --git a/spec/commands/search_spec.rb b/spec/commands/search_spec.rb index a296781f4..fea01163c 100644 --- a/spec/commands/search_spec.rb +++ b/spec/commands/search_spec.rb @@ -13,7 +13,6 @@ class SearchCLI < Mihari::CLI::Base before do allow(rule).to receive(:enrichers).and_return([]) - allow(Parallel).to receive(:processor_count).and_return(0) end describe "#search" do diff --git a/spec/rule_spec.rb b/spec/rule_spec.rb index b62c5ced9..05dacc109 100644 --- a/spec/rule_spec.rb +++ b/spec/rule_spec.rb @@ -39,10 +39,6 @@ [artifact, artifact] end - before do - allow(Parallel).to receive(:processor_count).and_return(0) - end - describe "#model" do it "returns a model" do expect(rule.model).to be_a Mihari::Models::Rule From 98ee6e81a8534e305a566c2f27d3b329b30eccac Mon Sep 17 00:00:00 2001 From: Manabu Niseki Date: Sat, 20 Jan 2024 20:00:52 +0900 Subject: [PATCH 2/3] fix: fix Artifact#domain --- lib/mihari/enrichers/google_public_dns.rb | 2 ++ lib/mihari/enrichers/whois.rb | 34 +++++++++++------------ lib/mihari/models/artifact.rb | 3 +- 3 files changed, 21 insertions(+), 18 deletions(-) diff --git a/lib/mihari/enrichers/google_public_dns.rb b/lib/mihari/enrichers/google_public_dns.rb index 9a1f54357..22efad9bf 100644 --- a/lib/mihari/enrichers/google_public_dns.rb +++ b/lib/mihari/enrichers/google_public_dns.rb @@ -12,6 +12,8 @@ class GooglePublicDNS < Base # @return [Mihari::Models::Artifact] # def call(artifact) + return if artifact.domain.nil? + res = client.query_all(artifact.domain) artifact.tap do |tapped| diff --git a/lib/mihari/enrichers/whois.rb b/lib/mihari/enrichers/whois.rb index a1ee7236a..3122ffc67 100644 --- a/lib/mihari/enrichers/whois.rb +++ b/lib/mihari/enrichers/whois.rb @@ -15,10 +15,21 @@ class Whois < Base # # @param [Mihari::Models::Artifact] artifact # - # @return [Mihari::Models::WhoisRecord, nil] - # def call(artifact) - artifact.whois_record ||= memoized_call(PublicSuffix.domain(artifact.domain)).dup + return if artifact.domain.nil? + + domain = PublicSuffix.domain(artifact.domain) + record = memoized_lookup(domain) + return if record.parser.available? + + artifact.whois_record ||= Models::WhoisRecord.new( + domain: domain, + created_on: get_created_on(record.parser), + updated_on: get_updated_on(record.parser), + expires_on: get_expires_on(record.parser), + registrar: get_registrar(record.parser), + contacts: get_contacts(record.parser) + ) end private @@ -41,21 +52,10 @@ def supported_data_types # # @return [Mihari::Models::WhoisRecord, nil] # - def memoized_call(domain) - record = whois.lookup(domain) - parser = record.parser - return nil if parser.available? - - Models::WhoisRecord.new( - domain: domain, - created_on: get_created_on(parser), - updated_on: get_updated_on(parser), - expires_on: get_expires_on(parser), - registrar: get_registrar(parser), - contacts: get_contacts(parser) - ) + def memoized_lookup(domain) + whois.lookup domain end - memo_wise :memoized_call + memo_wise :memoized_lookup # # @return [::Whois::Client] diff --git a/lib/mihari/models/artifact.rb b/lib/mihari/models/artifact.rb index 3b190e8dc..bce0a55a2 100644 --- a/lib/mihari/models/artifact.rb +++ b/lib/mihari/models/artifact.rb @@ -190,7 +190,8 @@ def domain when "domain" data when "url" - Addressable::URI.parse(data).host + host = Addressable::URI.parse(data).host + (DataType.type(host) == "ip") ? nil : host end end From f8d0f71f2f8c399c9510fa2974e3f4a188ea4091 Mon Sep 17 00:00:00 2001 From: Manabu Niseki Date: Sat, 20 Jan 2024 20:08:51 +0900 Subject: [PATCH 3/3] docs: update parallel description --- docs/analyzers/index.md | 2 +- docs/emitters/index.md | 5 +++++ docs/enrichers/index.md | 5 +++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/docs/analyzers/index.md b/docs/analyzers/index.md index 94d0dae19..4f6e873b5 100644 --- a/docs/analyzers/index.md +++ b/docs/analyzers/index.md @@ -95,7 +95,7 @@ queries: ### Parallel -`parallel` (`bool`) controls whether to do the parallel query execution or not. Optional. Defaults to `false`. Configurable via `PARALLEL` environment variable. +`parallel` (`bool`) controls whether to allow parallel execution or not. Optional. Defaults to `false`. Configurable via `PARALLEL` environment variable. ### Pagination Interval diff --git a/docs/emitters/index.md b/docs/emitters/index.md index 324951e9a..545f8e4b0 100644 --- a/docs/emitters/index.md +++ b/docs/emitters/index.md @@ -17,6 +17,7 @@ options: retry_times: ... retry_interval: ... retry_exponential_backoff: ... + parallel: ... ``` ### Timeout @@ -34,3 +35,7 @@ options: ### Retry Exponential Backoff `retry_exponential_backoff` (`bool`) controls whether to do exponential backoff. Optional. Defaults to `true`. Configurable via `RETRY_EXPONENTIAL_BACKOFF` environment variable. + +### Parallel + +`parallel` (`bool`) controls whether to allow parallel execution or not. Optional. Defaults to `false`. Configurable via `PARALLEL` environment variable. diff --git a/docs/enrichers/index.md b/docs/enrichers/index.md index b7f273683..dea9e5c74 100644 --- a/docs/enrichers/index.md +++ b/docs/enrichers/index.md @@ -16,6 +16,7 @@ options: retry_times: ... retry_interval: ... retry_exponential_backoff: ... + parallel: ... ``` ### Timeout @@ -33,3 +34,7 @@ options: ### Retry Exponential Backoff `retry_exponential_backoff` (`bool`) controls whether to do exponential backoff. Optional. Defaults to `true`. Configurable via `RETRY_EXPONENTIAL_BACKOFF` environment variable. + +### Parallel + +`parallel` (`bool`) controls whether to allow parallel execution or not. Optional. Defaults to `false`. Configurable via `PARALLEL` environment variable.