From 136c9c5f81f6a8b12c66e2edb6c22f5307c56c34 Mon Sep 17 00:00:00 2001 From: Manabu Niseki Date: Sat, 10 Aug 2024 15:39:41 +0900 Subject: [PATCH] feat: improve enrichment parallelism (#1108) --- lib/mihari/enrichers/whois.rb | 4 ++- lib/mihari/models/artifact.rb | 48 ++++++++++++++++++++++++++++++----- lib/mihari/rule.rb | 5 +--- 3 files changed, 46 insertions(+), 11 deletions(-) diff --git a/lib/mihari/enrichers/whois.rb b/lib/mihari/enrichers/whois.rb index f00678ad..9ef59c39 100644 --- a/lib/mihari/enrichers/whois.rb +++ b/lib/mihari/enrichers/whois.rb @@ -16,7 +16,9 @@ class Whois < Base def call(artifact) return if artifact.domain.nil? - artifact.whois_record ||= memoized_lookup(PublicSuffix.domain(artifact.domain)) + artifact.tap do |tapped| + tapped.whois_record ||= memoized_lookup(PublicSuffix.domain(artifact.domain)) + end end private diff --git a/lib/mihari/models/artifact.rb b/lib/mihari/models/artifact.rb index 82fd642c..c3989756 100644 --- a/lib/mihari/models/artifact.rb +++ b/lib/mihari/models/artifact.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "ostruct" + module Mihari module Models # @@ -158,10 +160,7 @@ class Artifact < ActiveRecord::Base # @return [Boolean] true if it is unique. Otherwise false. # def unique?(base_time: nil, artifact_ttl: nil) - artifact = self.class.joins(:alert).where( - data:, - alert: {rule_id:} - ).order(created_at: :desc).first + artifact = self.class.joins(:alert).where(data:, alert: {rule_id:}).order(created_at: :desc).first return true if artifact.nil? # check whether the artifact is decayed or not @@ -179,7 +178,32 @@ def enrichable? end def enrich - callable_enrichers.each { |enricher| enricher.result self } + enrich_by_enrichers callable_enrichers + end + + # + # @param [Array] enrichers + # @param [Boolean] parallel + # + # @return [Mihari::Models::Artifact] + # + def enrich_by_enrichers(enrichers) + # NOTE: doing parallel with ActiveRecord objects is troublesome (e.g. connection issue, etc.) + # so converting the object to an OpenStruct object + s = struct + results = Parallel.map(enrichers) { |enricher| enricher.result s } + enriched = results.compact.map { |result| result.value_or(nil) }.compact + + self.dns_records = enriched.map(&:dns_records).flatten.compact + self.cpes = enriched.map(&:cpes).flatten.compact + self.ports = enriched.map(&:ports).flatten.compact + self.vulnerabilities = enriched.map(&:vulnerabilities).flatten.compact + + self.autonomous_system = enriched.map(&:autonomous_system).compact.first + self.geolocation = enriched.map(&:geolocation).compact.first + self.whois_record = enriched.map(&:whois_record).compact.first + + self end # @@ -195,6 +219,18 @@ def domain end end + def struct + OpenStruct.new(attributes).tap do |s| + s.domain = domain + s.cpes ||= [] + s.dns_records ||= [] + s.ports ||= [] + s.reverse_dns_names ||= [] + s.vulnerabilities ||= [] + s.tags ||= [] + end + end + class << self # @!method search_by_filter(filter) # @param [Mihari::Structs::Filters::Search] filter @@ -212,7 +248,7 @@ class << self # def callable_enrichers @callable_enrichers ||= Mihari.enrichers.map(&:new).select do |enricher| - enricher.callable?(self) + enricher.callable? self end end diff --git a/lib/mihari/rule.rb b/lib/mihari/rule.rb index db0b748a..647c7710 100644 --- a/lib/mihari/rule.rb +++ b/lib/mihari/rule.rb @@ -178,10 +178,7 @@ def unique_artifacts # def enriched_artifacts @enriched_artifacts ||= Parallel.map(unique_artifacts) do |artifact| - artifact.tap do |tapped| - # NOTE: To apply changes correctly, enrichers should be applied to an artifact serially - enrichers.each { |enricher| enricher.result(tapped) } - end + artifact.enrich_by_enrichers enrichers end end