Resolved comments 6 PR#44: modifying integration test to increase cov…

…erage, other changes to satisfy mypy
facebookresearch · Nov 23, 2024 · 8de0914 · 8de0914
1 parent 7c3b3f9
commit 8de0914
Show file tree

Hide file tree

Showing 3 changed files with 28 additions and 8 deletions.
diff --git a/sonar/inference_pipelines/mutox_speech.py b/sonar/inference_pipelines/mutox_speech.py
@@ -11,9 +11,12 @@
 from fairseq2.typing import Device
 
 from sonar.inference_pipelines.speech import (
+    AudioToFbankDataPipelineBuilder,
     SpeechInferenceParams,
+    SpeechInferencePipeline,
     SpeechToEmbeddingPipeline,
 )
+from sonar.inference_pipelines.utils import extract_sequence_batch
 from sonar.models.encoder_model import SonarEncoderModel
 from sonar.models.mutox.classifier import MutoxClassifier
 from sonar.models.mutox.loader import load_mutox_model
@@ -22,7 +25,9 @@
 CPU_DEVICE = torch.device("cpu")
 
 
-class MutoxSpeechClassifierPipeline(SpeechToEmbeddingPipeline):
+class MutoxSpeechClassifierPipeline(SpeechInferencePipeline):
+    model: SonarEncoderModel
+
     def __init__(
         self,
         mutox_classifier: Union[str, MutoxClassifier],
@@ -36,7 +41,7 @@ def __init__(
         else:
             self.model = encoder
 
-        super().__init__(self.model)
+        super().__init__()
 
         self.model.to(device).eval()
 
@@ -56,17 +61,30 @@ def load_model_from_name(
         mutox_classifier_name: str,
         encoder_name: str,
         device: Device = CPU_DEVICE,
-    ) -> "SpeechToEmbeddingPipeline":
+    ) -> "MutoxSpeechClassifierPipeline":
         encoder = load_sonar_speech_model(encoder_name, device=device, progress=False)
         mutox_classifier = load_mutox_model(
             mutox_classifier_name, device=device, progress=False
         )
         return cls(mutox_classifier=mutox_classifier, encoder=encoder, device=device)
 
     def prebuild_pipeline(self, context: SpeechInferenceParams) -> DataPipelineBuilder:
-        pipeline_builder = super().prebuild_pipeline(context)
+        audio_to_fbank_dp_builder = AudioToFbankDataPipelineBuilder()
+        pipeline_builder = (
+            audio_to_fbank_dp_builder.prebuild_pipeline(context)
+            .map(
+                lambda fbank: extract_sequence_batch(fbank, context.device),
+                selector="audio.data.fbank",
+            )
+            .map(self.run_inference, selector="audio.data")
+        )
         return pipeline_builder.map(self._run_classifier, selector="audio.data")
 
+    @torch.inference_mode()
+    def run_inference(self, fbank: torch.Tensor) -> dict:
+        """Runs the encoder model on the extracted FBANK features."""
+        return {"sentence_embeddings": self.model(fbank)}
+
     @torch.inference_mode()
     def _run_classifier(self, data: dict):
         sentence_embeddings = data.get("sentence_embeddings")

diff --git a/tests/integration_tests/test_mutox.py b/tests/integration_tests/test_mutox.py
@@ -76,9 +76,11 @@ def test_sonar_mutox_classifier_integration(input_texts, source_lang, expected_o
             [0.0],
         ),
         (
-            ["She worked hard and made a significant contribution to the team."],
+            [
+                "Dammit, that was a terrible launch, it will piss the director and make the mission fail."
+            ],
             "eng_Latn",
-            [0.0],
+            [0.23],
         ),
         (
             [
@@ -117,7 +119,7 @@ def test_sonar_mutox_classifier_probability_integration(
 
             prob = classifier(emb.to(device).to(dtype), output_prob=True)
 
-            assert abs(prob.item() - expected_prob) < 0.001, (
+            assert abs(prob.item() - expected_prob) < 0.01, (
                 f"Expected probability {expected_prob}, but got {prob.item()}. "
                 "Output probability should be within a reasonable range."
             )
diff --git a/tests/unit_tests/huggingface_pipelines/text.py b/tests/unit_tests/huggingface_pipelines/text.py
@@ -52,7 +52,7 @@ def test_embedding_to_text_process_batch(embedding_to_text_config):
     embedding_dim = 1024
     num_embeddings = 4
 
-    embeddings = [
+    embeddings: List[np.ndarray] = [
         np.random.rand(embedding_dim).astype(np.float32) for _ in range(num_embeddings)
     ]