From 31b7a07899344f8dcf537bde94d9600da11e359e Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Thu, 5 Sep 2024 14:04:39 -0400
Subject: [PATCH 01/18] add async job components

---
 .../sources/declarative/async_job/__init__.py |   0
 .../sources/declarative/async_job/job.py      |  45 ++++
 .../declarative/async_job/job_orchestrator.py | 238 ++++++++++++++++++
 .../declarative/async_job/repository.py       |  23 ++
 .../sources/declarative/async_job/status.py   |  49 ++++
 .../sources/declarative/async_job/timer.py    |  39 +++
 .../declarative_component_schema.yaml         | 105 ++++++++
 .../declarative/extractors/__init__.py        |   3 +-
 .../declarative/extractors/record_selector.py |  21 +-
 .../extractors/response_to_file_extractor.py  | 162 ++++++++++++
 .../models/declarative_component_schema.py    |  70 +++++-
 .../parsers/model_to_component_factory.py     |  68 ++++-
 .../requesters/http_job_repository.py         | 163 ++++++++++++
 .../declarative/retrievers/__init__.py        |   3 +-
 .../declarative/retrievers/async_retriever.py | 140 +++++++++++
 .../declarative/transformations/add_fields.py |  10 +-
 .../keys_to_lower_transformation.py           |  30 +++
 .../transformations/remove_fields.py          |   6 +-
 .../transformations/transformation.py         |  10 +-
 .../sources/declarative/job/__init__.py       |   0
 .../declarative/job/test_integration.py       | 115 +++++++++
 .../declarative/job/test_job_orchestrator.py  | 146 +++++++++++
 .../requesters/test_http_job_repository.py    | 180 +++++++++++++
 .../transformations/test_add_fields.py        |   3 +-
 .../test_keys_to_lower_transformation.py      |  13 +
 .../transformations/test_remove_fields.py     |   3 +-
 26 files changed, 1622 insertions(+), 23 deletions(-)
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/__init__.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/timer.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
 create mode 100644 airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/job/__init__.py
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_job_repository.py
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_keys_to_lower_transformation.py

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
new file mode 100644
index 000000000000..3f2285f27494
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
@@ -0,0 +1,45 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+
+from datetime import timedelta
+from typing import Optional
+
+from airbyte_cdk import StreamSlice
+from airbyte_cdk.sources.declarative.async_job.timer import Timer
+
+from .status import AsyncJobStatus
+
+
+class AsyncJob:
+    """
+    Note that the timer will only stop once `update_status` is called so the job might be completed on the API side but until we query for
+    it and call `ApiJob.update_status`, `ApiJob.status` will not reflect the actual API side status.
+    """
+
+    def __init__(self, api_job_id: str, job_parameters: StreamSlice, timeout: Optional[timedelta] = None) -> None:
+        self._api_job_id = api_job_id
+        self._job_parameters = job_parameters
+        self._status = AsyncJobStatus.RUNNING
+
+        timeout = timeout if timeout else timedelta(minutes=60)
+        self._timer = Timer(timeout)
+        self._timer.start()
+
+    def api_job_id(self) -> str:
+        return self._api_job_id
+
+    def status(self) -> AsyncJobStatus:
+        if self._timer.has_timed_out():
+            return AsyncJobStatus.TIMED_OUT
+        return self._status
+
+    def job_parameters(self) -> StreamSlice:
+        return self._job_parameters
+
+    def update_status(self, status: AsyncJobStatus) -> None:
+        if self._status != AsyncJobStatus.RUNNING and status == AsyncJobStatus.RUNNING:
+            self._timer.start()
+        elif status in [AsyncJobStatus.FAILED, AsyncJobStatus.TIMED_OUT, AsyncJobStatus.COMPLETED]:
+            self._timer.stop()
+
+        self._status = status
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
new file mode 100644
index 000000000000..098800bcab57
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
@@ -0,0 +1,238 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+import logging
+import time
+from typing import Any, Generator, Iterable, List, Mapping, Optional, Set
+
+from airbyte_cdk import StreamSlice
+from airbyte_cdk.models import FailureType
+from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
+from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
+from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
+from airbyte_cdk.utils.traced_exception import AirbyteTracedException
+
+LOGGER = logging.getLogger("airbyte")
+
+
+class AsyncPartition:
+    """
+    This bucket of api_jobs is a bit useless for this iteration but should become interesting when we will be able to split jobs
+    """
+
+    _MAX_NUMBER_OF_ATTEMPTS = 3
+
+    def __init__(self, jobs: List[AsyncJob], stream_slice: StreamSlice) -> None:
+        self._attempts_per_job = {job: 0 for job in jobs}
+        self._stream_slice = stream_slice
+
+    def has_reached_max_attempt(self) -> bool:
+        return any(map(lambda attempt_count: attempt_count >= self._MAX_NUMBER_OF_ATTEMPTS, self._attempts_per_job.values()))
+
+    def replace_job(self, job_to_replace: AsyncJob, new_jobs: List[AsyncJob]) -> None:
+        current_attempt_count = self._attempts_per_job.pop(job_to_replace, None)
+        if current_attempt_count is None:
+            raise ValueError("Could not find job to replace")
+        elif current_attempt_count >= self._MAX_NUMBER_OF_ATTEMPTS:
+            raise ValueError(f"Max attempt reached for job in partition {self._stream_slice}")
+
+        new_attempt_count = current_attempt_count + 1
+        for job in new_jobs:
+            self._attempts_per_job[job] = new_attempt_count
+
+    def should_split(self, job: AsyncJob) -> bool:
+        """
+        Not used right now but once we support job split, we should split based on the number of attempts
+        """
+        return False
+
+    @property
+    def jobs(self) -> Iterable[AsyncJob]:
+        return self._attempts_per_job.keys()
+
+    @property
+    def stream_slice(self) -> StreamSlice:
+        return self._stream_slice
+
+    @property
+    def status(self) -> AsyncJobStatus:
+        """
+        Given different job statuses, the priority is: FAILED, TIMED_OUT, RUNNING. Else, it means everything is completed.
+        """
+        statuses = set(map(lambda job: job.status(), self.jobs))
+        if statuses == {AsyncJobStatus.COMPLETED}:
+            return AsyncJobStatus.COMPLETED
+        elif AsyncJobStatus.FAILED in statuses:
+            return AsyncJobStatus.FAILED
+        elif AsyncJobStatus.TIMED_OUT in statuses:
+            return AsyncJobStatus.TIMED_OUT
+        else:
+            return AsyncJobStatus.RUNNING
+
+    # TODO def __repr__(self) -> str: for slice printing
+
+
+class AsyncJobOrchestrator:
+    _WAIT_TIME_BETWEEN_STATUS_UPDATE_IN_SECONDS = 5
+
+    def __init__(
+        self,
+        job_repository: AsyncJobRepository,
+        slices: Iterable[StreamSlice],
+        number_of_retries: Optional[int] = None,
+    ) -> None:
+        self._job_repository: AsyncJobRepository = job_repository
+        self._slice_iterator = iter(slices)
+        self._running_partitions: List[AsyncPartition] = []
+
+    def _replace_failed_jobs(self, partition: AsyncPartition) -> None:
+        failed_status_jobs = (AsyncJobStatus.FAILED, AsyncJobStatus.TIMED_OUT)
+        jobs_to_replace = [job for job in partition.jobs if job.status() in failed_status_jobs]
+        for job in jobs_to_replace:
+            new_job = self._job_repository.start(job.job_parameters())
+            partition.replace_job(job, [new_job])
+
+    def _start_jobs(self) -> None:
+        """
+        Start the jobs for each slice in the slice iterator.
+        This method iterates over the slice iterator and starts a job for each slice.
+        The started jobs are added to the running partitions.
+        Returns:
+            None
+
+        TODO Eventually, we need to cap the number of concurrent jobs.
+        However, the first iteration is for sendgrid which only has one job.
+        """
+        for partition in self._running_partitions:
+            self._replace_failed_jobs(partition)
+
+        for _slice in self._slice_iterator:
+            job = self._job_repository.start(_slice)
+            self._running_partitions.append(AsyncPartition([job], _slice))
+
+    def _get_running_jobs(self) -> Set[AsyncJob]:
+        """
+        Returns a set of running AsyncJob objects.
+
+        Returns:
+            Set[AsyncJob]: A set of AsyncJob objects that are currently running.
+        """
+        return {job for partition in self._running_partitions for job in partition.jobs if job.status() == AsyncJobStatus.RUNNING}
+
+    def _update_jobs_status(self) -> None:
+        """
+        Update the status of all running jobs in the repository.
+        """
+        running_jobs = self._get_running_jobs()
+        if running_jobs:
+            # update the status only if there are RUNNING jobs
+            self._job_repository.update_jobs_status(running_jobs)
+
+    def _wait_on_status_update(self) -> None:
+        """
+        Waits for a specified amount of time between status updates.
+
+
+        This method is used to introduce a delay between status updates in order to avoid excessive polling.
+        The duration of the delay is determined by the value of `_WAIT_TIME_BETWEEN_STATUS_UPDATE_IN_SECONDS`.
+
+        Returns:
+            None
+        """
+        self._debug_log(f"Polling status in progress. There are currently {len(self._running_partitions)} running partitions.")
+
+        # wait only when there are running partitions
+        if self._running_partitions:
+            self._debug_log(f"Waiting for {self._WAIT_TIME_BETWEEN_STATUS_UPDATE_IN_SECONDS} seconds before next poll...")
+            time.sleep(self._WAIT_TIME_BETWEEN_STATUS_UPDATE_IN_SECONDS)
+
+    def _debug_log(self, message: str) -> None:
+        # if statement in order to avoid string formatting if we're not in debug mode
+        if LOGGER.isEnabledFor(logging.DEBUG):
+            LOGGER.debug(message)
+
+    def _process_completed_partition(self, partition: AsyncPartition) -> None:
+        """
+        Process a completed partition.
+        Args:
+            partition (AsyncPartition): The completed partition to process.
+        Returns:
+            AsyncPartition: The processed partition.
+        """
+        job_ids = list(map(lambda job: job.api_job_id(), {job for job in partition.jobs}))
+        LOGGER.info(f"The following jobs for stream slice {partition.stream_slice} have been completed: {job_ids}.")
+
+    def _process_running_partitions_and_yield_completed_ones(self) -> Generator[AsyncPartition, Any, None]:
+        """
+        Process the running partitions.
+
+        Yields:
+            AsyncPartition: The processed partition.
+
+        Raises:
+            Any: Any exception raised during processing.
+        """
+        current_running_partitions: List[AsyncPartition] = []
+        for partition in self._running_partitions:
+            match partition.status:
+                case AsyncJobStatus.COMPLETED:
+                    self._process_completed_partition(partition)
+                    yield partition
+                case AsyncJobStatus.RUNNING:
+                    current_running_partitions.append(partition)
+                case _ if partition.has_reached_max_attempt():
+                    self._process_partitions_with_errors(partition)
+                case _:
+                    # job will be restarted in `_start_job`
+                    current_running_partitions.insert(0, partition)
+        # update the referenced list with running partitions
+        self._running_partitions = current_running_partitions
+
+    def _process_partitions_with_errors(self, partition: AsyncPartition) -> None:
+        """
+        Process a partition with status errors (FAILED and TIMEOUT).
+
+        Args:
+            partition (AsyncPartition): The partition to process.
+        Returns:
+            AirbyteTracedException: An exception indicating that at least one job could not be completed.
+        Raises:
+            AirbyteTracedException: If at least one job could not be completed.
+        """
+        status_by_job_id = {job.api_job_id(): job.status() for job in partition.jobs}
+        raise AirbyteTracedException(
+            message=f"At least one job could not be completed. Job statuses were: {status_by_job_id}",
+            failure_type=FailureType.system_error,
+        )
+
+    def create_and_get_completed_partitions(self) -> Iterable[AsyncPartition]:
+        """
+        Creates and retrieves completed partitions.
+        This method continuously starts jobs, updates job status, processes running partitions,
+        logs polling partitions, and waits for status updates. It yields completed partitions
+        as they become available.
+
+        Returns:
+            An iterable of completed partitions, represented as AsyncPartition objects.
+            Each partition is wrapped in an Optional, allowing for None values.
+        """
+        while True:
+            self._start_jobs()
+            if not self._running_partitions:
+                break
+
+            self._update_jobs_status()
+            yield from self._process_running_partitions_and_yield_completed_ones()
+            self._wait_on_status_update()
+
+    def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
+        """
+        Fetches records from the given partition's jobs.
+
+        Args:
+            partition (AsyncPartition): The partition containing the jobs.
+
+        Yields:
+            Iterable[Mapping[str, Any]]: The fetched records from the jobs.
+        """
+        for job in partition.jobs:
+            yield from self._job_repository.fetch_records(job)
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
new file mode 100644
index 000000000000..e536eeae039c
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
@@ -0,0 +1,23 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+from abc import abstractmethod
+from dataclasses import dataclass
+from typing import Any, Iterable, Mapping, Set
+
+from airbyte_cdk import StreamSlice
+from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
+
+
+@dataclass
+class AsyncJobRepository:
+    @abstractmethod
+    def start(self, stream_slice: StreamSlice) -> AsyncJob:
+        pass
+
+    @abstractmethod
+    def update_jobs_status(self, jobs: Set[AsyncJob]) -> None:
+        pass
+
+    @abstractmethod
+    def fetch_records(self, job: AsyncJob) -> Iterable[Mapping[str, Any]]:
+        pass
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
new file mode 100644
index 000000000000..175dc6b78a66
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
@@ -0,0 +1,49 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+
+from dataclasses import InitVar, dataclass
+from enum import Enum
+from typing import Any, List, Mapping
+
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import AsyncJobStatusMap as AsyncJobStatusMapModel
+
+
+class AsyncJobStatus(Enum):
+    RUNNING = "RUNNING"
+    COMPLETED = "COMPLETED"
+    FAILED = "FAILED"
+    TIMED_OUT = "TIMED_OUT"
+
+
+@dataclass
+class AsyncJobStatusMap:
+    model: AsyncJobStatusMapModel
+    parameters: InitVar[Mapping[str, Any]]
+
+    def parse_input(self) -> Mapping[str, AsyncJobStatus]:
+        """
+        Parses the input and returns a mapping of status values.
+
+        Returns:
+            A mapping of status values where the keys are the input values and the values are the corresponding AsyncJobStatus.
+
+        Example:
+            {
+                'running': AsyncJobStatus.RUNNING,
+                'completed': AsyncJobStatus.COMPLETED,
+                'failed': AsyncJobStatus.FAILED,
+                'timeout': AsyncJobStatus.TIMED_OUT,
+            }
+        """
+        status_mapping = {}
+        for key, value in self.model.dict().items():
+            match key:
+                case "running":
+                    status_mapping[value] = AsyncJobStatus.RUNNING
+                case "completed":
+                    status_mapping[value] = AsyncJobStatus.COMPLETED
+                case "failed":
+                    status_mapping[value] = AsyncJobStatus.FAILED
+                case "timeout":
+                    status_mapping[value] = AsyncJobStatus.TIMED_OUT
+        return status_mapping
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/timer.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/timer.py
new file mode 100644
index 000000000000..c4e5a9a1d85a
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/timer.py
@@ -0,0 +1,39 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+from datetime import datetime, timedelta, timezone
+from typing import Optional
+
+
+class Timer:
+    def __init__(self, timeout: timedelta) -> None:
+        self._start_datetime: Optional[datetime] = None
+        self._end_datetime: Optional[datetime] = None
+        self._timeout = timeout
+
+    def start(self) -> None:
+        self._start_datetime = self._now()
+        self._end_datetime = None
+
+    def stop(self) -> None:
+        if self._end_datetime is None:
+            self._end_datetime = self._now()
+
+    def is_started(self) -> bool:
+        return self._start_datetime is not None
+
+    @property
+    def elapsed_time(self) -> Optional[timedelta]:
+        if not self._start_datetime:
+            return None
+
+        end_time = self._end_datetime or self._now()
+        elapsed_period = end_time - self._start_datetime
+        return elapsed_period
+
+    def has_timed_out(self) -> bool:
+        if not self.is_started():
+            return False
+        return self.elapsed_time > self._timeout  # type: ignore  # given the job timer is started, we assume there is an elapsed_period
+
+    @staticmethod
+    def _now() -> datetime:
+        return datetime.now(tz=timezone.utc)
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
index bfcf1f29774e..eb7fd3213e66 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -1151,6 +1151,7 @@ definitions:
         title: Retriever
         description: Component used to coordinate how records are extracted across stream slices and request pages.
         anyOf:
+          - "$ref": "#/definitions/AsyncRetriever"
           - "$ref": "#/definitions/CustomRetriever"
           - "$ref": "#/definitions/SimpleRetriever"
       incremental_sync:
@@ -1189,6 +1190,7 @@ definitions:
             - "$ref": "#/definitions/AddFields"
             - "$ref": "#/definitions/CustomTransformation"
             - "$ref": "#/definitions/RemoveFields"
+            - "$ref": "#/definitions/KeysToLower"
       state_migrations:
         title: State Migrations
         description: Array of state migrations to be applied on the input state
@@ -1685,6 +1687,19 @@ definitions:
       type:
         type: string
         enum: [JsonlDecoder]
+  KeysToLower:
+    title: Keys to Lower Fields
+    description: A transformation that renames all keys to lower case.
+    type: object
+    required:
+      - type
+    properties:
+      type:
+        type: string
+        enum: [KeysToLower]
+      $parameters:
+        type: object
+        additionalProperties: true
   IterableDecoder:
     title: Iterable Decoder
     description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
@@ -2341,6 +2356,96 @@ definitions:
       $parameters:
         type: object
         additionalProperties: true
+  AsyncJobStatusMap:
+    description: Matches the api job status to Async Job Status.
+    type: object
+    required:
+      - running
+      - completed
+      - failed
+      - timeout
+    properties:
+      type:
+        type: string
+        enum: [AsyncJobStatusMap]
+      running:
+        type: string
+      completed:
+        type: string
+      failed:
+        type: string
+      timeout:
+        type: string
+  AsyncRetriever:
+    description: Retrieves records by Asynchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router.
+    type: object
+    required:
+      - type
+      - record_selector
+      - status_mapping
+      - creation_requester
+      - polling_requester
+      - download_requester
+    properties:
+      type:
+        type: string
+        enum: [AsyncRetriever]
+      record_selector:
+        description: Component that describes how to extract records from a HTTP response.
+        "$ref": "#/definitions/RecordSelector"
+      status_mapping:
+        description: Async Job Status to Airbyte CDK Async Job Status mapping.
+        anyOf:
+          - "$ref": "#/definitions/AsyncJobStatusMap"
+      status_extractor:
+        description: Responsible for fetching the actual status of the async job.
+        anyOf:
+          - "$ref": "#/definitions/CustomRecordExtractor"
+          - "$ref": "#/definitions/DpathExtractor"
+      urls_extractor:
+        description: Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.
+        anyOf:
+          - "$ref": "#/definitions/CustomRecordExtractor"
+          - "$ref": "#/definitions/DpathExtractor"
+      creation_requester:
+        description: Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.
+        anyOf:
+          - "$ref": "#/definitions/CustomRequester"
+          - "$ref": "#/definitions/HttpRequester"
+      polling_requester:
+        description: Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.
+        anyOf:
+          - "$ref": "#/definitions/CustomRequester"
+          - "$ref": "#/definitions/HttpRequester"
+      download_requester:
+        description: Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.
+        anyOf:
+          - "$ref": "#/definitions/CustomRequester"
+          - "$ref": "#/definitions/HttpRequester"
+      partition_router:
+        title: Partition Router
+        description: PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.
+        default: []
+        anyOf:
+          - "$ref": "#/definitions/CustomPartitionRouter"
+          - "$ref": "#/definitions/ListPartitionRouter"
+          - "$ref": "#/definitions/SubstreamPartitionRouter"
+          - type: array
+            items:
+              anyOf:
+                - "$ref": "#/definitions/CustomPartitionRouter"
+                - "$ref": "#/definitions/ListPartitionRouter"
+                - "$ref": "#/definitions/SubstreamPartitionRouter"
+      decoder:
+        title: Decoder
+        description: Component decoding the response so records can be extracted.
+        anyOf:
+          - "$ref": "#/definitions/JsonDecoder"
+          - "$ref": "#/definitions/JsonlDecoder"
+          - "$ref": "#/definitions/IterableDecoder"
+      $parameters:
+        type: object
+        additionalProperties: true
   Spec:
     title: Spec
     description: A source specification made up of connector metadata and how it can be configured.
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py
index 5c361598d351..76304b467f43 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py
@@ -6,5 +6,6 @@
 from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
 from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
 from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
+from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import ResponseToFileExtractor
 
-__all__ = ["HttpSelector", "DpathExtractor", "RecordFilter", "RecordSelector"]
+__all__ = ["HttpSelector", "DpathExtractor", "RecordFilter", "RecordSelector", "ResponseToFileExtractor"]
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py
index 6f9cc4047838..eed33d858228 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py
@@ -61,6 +61,24 @@ def select_records(
         :return: List of Records selected from the response
         """
         all_data: Iterable[Mapping[str, Any]] = self.extractor.extract_records(response)
+        yield from self.filter_and_transform(all_data, stream_state, records_schema, stream_slice, next_page_token)
+
+    def filter_and_transform(
+        self,
+        all_data: Iterable[Mapping[str, Any]],
+        stream_state: StreamState,
+        records_schema: Mapping[str, Any],
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Iterable[Record]:
+        """
+        There is an issue with the selector as of 2024-08-30: it does technology-agnostic processing like filtering, transformation and
+        normalization with an API that is technology-specific (as requests.Response is only for HTTP communication using the requests
+        library).
+
+        Until we decide to move this logic away from the selector, we made this method public so that users like AsyncJobRetriever could
+        share the logic of doing transformations on a set of records.
+        """
         filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
         transformed_data = self._transform(filtered_data, stream_state, stream_slice)
         normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
@@ -101,6 +119,5 @@ def _transform(
     ) -> Iterable[Mapping[str, Any]]:
         for record in records:
             for transformation in self.transformations:
-                # record has type Mapping[str, Any], but Record expected
-                transformation.transform(record, config=self.config, stream_state=stream_state, stream_slice=stream_slice)  # type: ignore
+                transformation.transform(record, config=self.config, stream_state=stream_state, stream_slice=stream_slice)  # type: ignore  # record has type Mapping[str, Any], but Dict[str, Any] expected
             yield record
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
new file mode 100644
index 000000000000..1631e0f7d53a
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
@@ -0,0 +1,162 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+import os
+import uuid
+import zlib
+from contextlib import closing
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
+
+import pandas as pd
+import requests
+from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
+from numpy import nan
+
+EMPTY_STR: str = ""
+DEFAULT_ENCODING: str = "utf-8"
+DOWNLOAD_CHUNK_SIZE: int = 1024 * 1024 * 10
+
+
+@dataclass
+class ResponseToFileExtractor(RecordExtractor):
+    """
+    This class is used when having very big HTTP responses (usually streamed) which would require too much memory so we use disk space as
+    a tradeoff.
+
+    Eventually, we want to support multiple file type by re-using the file based CDK parsers if possible. However, the lift is too high for
+    a first iteration so we will only support CSV parsing using pandas as salesforce and sendgrid were doing.
+    """
+
+    def _get_response_encoding(self, headers: Dict[str, Any]) -> str:
+        """
+        Get the encoding of the response based on the provided headers. This method is heavily inspired by the requests library
+        implementation.
+
+        Args:
+            headers (Dict[str, Any]): The headers of the response.
+        Returns:
+            str: The encoding of the response.
+        """
+
+        content_type = headers.get("content-type")
+
+        if not content_type:
+            return DEFAULT_ENCODING
+
+        content_type, params = requests.utils.parse_header_links(content_type)
+
+        if "charset" in params:
+            return params["charset"].strip("'\"")  # type: ignore  # we assume headers are returned as str
+
+        return DEFAULT_ENCODING
+
+    def _filter_null_bytes(self, b: bytes) -> bytes:
+        """
+        Filter out null bytes from a bytes object.
+
+        Args:
+            b (bytes): The input bytes object.
+        Returns:
+            bytes: The filtered bytes object with null bytes removed.
+
+        Referenced Issue:
+            https://github.com/airbytehq/airbyte/issues/8300
+        """
+
+        res = b.replace(b"\x00", b"")
+        if len(res) < len(b):
+            pass
+            # FIXME self.logger.warning("Filter 'null' bytes from string, size reduced %d -> %d chars", len(b), len(res))
+        return res
+
+    def _save_to_file(self, response: Optional[requests.Response] = None) -> Tuple[str, str]:
+        """
+        Saves the binary data from the given response to a temporary file and returns the filepath and response encoding.
+
+        Args:
+            response (Optional[requests.Response]): The response object containing the binary data. Defaults to None.
+
+        Returns:
+            Tuple[str, str]: A tuple containing the filepath of the temporary file and the response encoding.
+
+        Raises:
+            ValueError: If the temporary file does not exist after saving the binary data.
+        """
+        # set filepath for binary data from response
+        decompressor = zlib.decompressobj(zlib.MAX_WBITS | 32)
+
+        if response:
+            tmp_file = str(uuid.uuid4())
+            with closing(response) as response, open(tmp_file, "wb") as data_file:
+                response_encoding = self._get_response_encoding(dict(response.headers or {}))
+                for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
+                    try:
+                        data_file.write(decompressor.decompress(chunk))
+                    except zlib.error:
+                        # we bypass having the context of the error here,
+                        # since it's just a flag-type exception to handle a different scenario.
+                        data_file.write(self._filter_null_bytes(chunk))
+
+            # check the file exists
+            if os.path.isfile(tmp_file):
+                return tmp_file, response_encoding
+            else:
+                raise ValueError(f"The IO/Error occured while verifying binary data. Tmp file {tmp_file} doesn't exist.")
+
+        # return default values
+        return EMPTY_STR, EMPTY_STR
+
+    def _read_with_chunks(self, path: str, file_encoding: str, chunk_size: int = 100) -> Iterable[Mapping[str, Any]]:
+        """
+        Reads data from a file in chunks and yields each row as a dictionary.
+
+        Args:
+            path (str): The path to the file to be read.
+            file_encoding (str): The encoding of the file.
+            chunk_size (int, optional): The size of each chunk to be read. Defaults to 100.
+
+        Yields:
+            Mapping[str, Any]: A dictionary representing each row of data.
+
+        Raises:
+            ValueError: If an IO/Error occurs while reading the temporary data.
+        """
+
+        try:
+            with open(path, "r", encoding=file_encoding) as data:
+                chunks = pd.read_csv(data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object)
+                for chunk in chunks:
+                    chunk = chunk.replace({nan: None}).to_dict(orient="records")
+                    for row in chunk:
+                        yield row
+        except pd.errors.EmptyDataError as e:
+            # FIXME logger.info(f"Empty data received. {e}")
+            yield from []
+        except IOError as ioe:
+            raise ValueError(f"The IO/Error occured while reading tmp data. Called: {path}", ioe)
+        finally:
+            # remove binary tmp file, after data is read
+            os.remove(path)
+
+    def extract_records(self, response: Optional[requests.Response] = None) -> Iterable[Mapping[str, Any]]:
+        """
+        Extracts records from the given response by:
+            1) Saving the result to a tmp file
+            2) Reading from saved file by chunks to avoid OOM
+
+        Args:
+            response (Optional[requests.Response]): The response object containing the data. Defaults to None.
+
+        Yields:
+            Iterable[Mapping[str, Any]]: An iterable of mappings representing the extracted records.
+
+        Returns:
+            None
+        """
+        if response:
+            file_path, encoding = self._save_to_file(response)
+            yield from self._read_with_chunks(file_path, encoding)
+        else:
+            yield from []
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
index f9e4856402b0..c5ab599e030d 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
@@ -613,6 +613,11 @@ class JsonlDecoder(BaseModel):
     type: Literal['JsonlDecoder']
 
 
+class KeysToLower(BaseModel):
+    type: Literal['KeysToLower']
+    parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
+
+
 class IterableDecoder(BaseModel):
     type: Literal['IterableDecoder']
 
@@ -881,6 +886,14 @@ class LegacySessionTokenAuthenticator(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
 
 
+class AsyncJobStatusMap(BaseModel):
+    type: Optional[Literal['AsyncJobStatusMap']] = None
+    running: str
+    completed: str
+    failed: str
+    timeout: str
+
+
 class ValueType(Enum):
     string = 'string'
     number = 'number'
@@ -1355,7 +1368,7 @@ class Config:
         extra = Extra.allow
 
     type: Literal['DeclarativeStream']
-    retriever: Union[CustomRetriever, SimpleRetriever] = Field(
+    retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
         ...,
         description='Component used to coordinate how records are extracted across stream slices and request pages.',
         title='Retriever',
@@ -1381,7 +1394,7 @@ class Config:
         title='Schema Loader',
     )
     transformations: Optional[
-        List[Union[AddFields, CustomTransformation, RemoveFields]]
+        List[Union[AddFields, CustomTransformation, RemoveFields, KeysToLower]]
     ] = Field(
         None,
         description='A list of transformations to be applied to each output record.',
@@ -1607,6 +1620,58 @@ class SimpleRetriever(BaseModel):
     parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
 
 
+class AsyncRetriever(BaseModel):
+    type: Literal['AsyncRetriever']
+    record_selector: RecordSelector = Field(
+        ...,
+        description='Component that describes how to extract records from a HTTP response.',
+    )
+    status_mapping: AsyncJobStatusMap = Field(
+        ..., description='Async Job Status to Airbyte CDK Async Job Status mapping.'
+    )
+    status_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
+        None, description='Responsible for fetching the actual status of the async job.'
+    )
+    urls_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
+        None,
+        description='Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.',
+    )
+    creation_requester: Union[CustomRequester, HttpRequester] = Field(
+        ...,
+        description='Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.',
+    )
+    polling_requester: Union[CustomRequester, HttpRequester] = Field(
+        ...,
+        description='Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.',
+    )
+    download_requester: Union[CustomRequester, HttpRequester] = Field(
+        ...,
+        description='Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.',
+    )
+    partition_router: Optional[
+        Union[
+            CustomPartitionRouter,
+            ListPartitionRouter,
+            SubstreamPartitionRouter,
+            List[
+                Union[
+                    CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter
+                ]
+            ],
+        ]
+    ] = Field(
+        [],
+        description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.',
+        title='Partition Router',
+    )
+    decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder]] = Field(
+        None,
+        description='Component decoding the response so records can be extracted.',
+        title='Decoder',
+    )
+    parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters')
+
+
 class SubstreamPartitionRouter(BaseModel):
     type: Literal['SubstreamPartitionRouter']
     parent_stream_configs: List[ParentStreamConfig] = Field(
@@ -1623,3 +1688,4 @@ class SubstreamPartitionRouter(BaseModel):
 DeclarativeStream.update_forward_refs()
 SessionTokenAuthenticator.update_forward_refs()
 SimpleRetriever.update_forward_refs()
+AsyncRetriever.update_forward_refs()
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index 686eb5d3f9d6..958e8f531013 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -45,6 +45,8 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import AddedFieldDefinition as AddedFieldDefinitionModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import AddFields as AddFieldsModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import ApiKeyAuthenticator as ApiKeyAuthenticatorModel
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import AsyncJobStatusMap as AsyncJobStatusMapModel
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import AsyncRetriever as AsyncRetrieverModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import BasicHttpAuthenticator as BasicHttpAuthenticatorModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import BearerAuthenticator as BearerAuthenticatorModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import CheckStream as CheckStreamModel
@@ -81,6 +83,7 @@
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtAuthenticator as JwtAuthenticatorModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtHeaders as JwtHeadersModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import JwtPayload as JwtPayloadModel
+from airbyte_cdk.sources.declarative.models.declarative_component_schema import KeysToLower as KeysToLowerModel
 from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
     LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
 )
@@ -135,12 +138,13 @@
 from airbyte_cdk.sources.declarative.requesters.request_options import InterpolatedRequestOptionsProvider
 from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
 from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
-from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, SimpleRetrieverTestReadDecorator
+from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, SimpleRetriever, SimpleRetrieverTestReadDecorator
 from airbyte_cdk.sources.declarative.schema import DefaultSchemaLoader, InlineSchemaLoader, JsonFileSchemaLoader
 from airbyte_cdk.sources.declarative.spec import Spec
 from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
 from airbyte_cdk.sources.declarative.transformations import AddFields, RecordTransformation, RemoveFields
 from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
+from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import KeysToLowerTransformation
 from airbyte_cdk.sources.message import InMemoryMessageRepository, LogAppenderMessageRepositoryDecorator, MessageRepository
 from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
 from airbyte_cdk.sources.types import Config
@@ -150,6 +154,11 @@
 
 ComponentDefinition = Mapping[str, Any]
 
+from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
+from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
+from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatusMap
+from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
+
 
 class ModelToComponentFactory:
     def __init__(
@@ -205,6 +214,7 @@ def _init_mappings(self) -> None:
             InlineSchemaLoaderModel: self.create_inline_schema_loader,
             JsonDecoderModel: self.create_json_decoder,
             JsonlDecoderModel: self.create_jsonl_decoder,
+            KeysToLowerModel: self.create_keys_to_lower_transformation,
             IterableDecoderModel: self.create_iterable_decoder,
             JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
             JwtAuthenticatorModel: self.create_jwt_authenticator,
@@ -229,6 +239,8 @@ def _init_mappings(self) -> None:
             SubstreamPartitionRouterModel: self.create_substream_partition_router,
             WaitTimeFromHeaderModel: self.create_wait_time_from_header,
             WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
+            AsyncRetrieverModel: self.create_async_retriever,
+            AsyncJobStatusMapModel: self.create_async_job_status_mapping,
         }
 
         # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -288,6 +300,9 @@ def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any
         ]
         return AddFields(fields=added_field_definitions, parameters=model.parameters or {})
 
+    def create_keys_to_lower_transformation(self, model: KeysToLowerModel, config: Config, **kwargs: Any) -> KeysToLowerTransformation:
+        return KeysToLowerTransformation(parameters=model.parameters or {})
+
     @staticmethod
     def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
         if not value_type:
@@ -1162,6 +1177,57 @@ def create_simple_retriever(
             parameters=model.parameters or {},
         )
 
+    def create_async_job_status_mapping(self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any) -> AsyncJobStatusMap:
+        return AsyncJobStatusMap(model=model, parameters={})
+
+    def create_async_retriever(
+        self,
+        model: AsyncRetrieverModel,
+        config: Config,
+        *,
+        name: str,
+        primary_key: Optional[Union[str, List[str], List[List[str]]]],
+        stream_slicer: Optional[StreamSlicer],
+        client_side_incremental_sync: Optional[Dict[str, Any]] = None,
+        transformations: List[RecordTransformation],
+        **kwargs,
+    ) -> AsyncRetriever:
+
+        decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={})
+        record_selector = self._create_component_from_model(
+            model=model.record_selector,
+            config=config,
+            decoder=decoder,
+            transformations=transformations,
+            client_side_incremental_sync=client_side_incremental_sync,
+        )
+        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
+        creation_requester = self._create_component_from_model(model=model.creation_requester, decoder=decoder, config=config, name=name)
+        polling_requester = self._create_component_from_model(model=model.polling_requester, decoder=decoder, config=config, name=name)
+        download_requester = self._create_component_from_model(model=model.download_requester, decoder=decoder, config=config, name=name)
+        status_extractor = self._create_component_from_model(model=model.status_extractor, decoder=decoder, config=config, name=name)
+        urls_extractor = self._create_component_from_model(model=model.urls_extractor, decoder=decoder, config=config, name=name)
+        status_mapping = self._create_component_from_model(model=model.status_mapping, config=config, name=name)
+        job_repository: AsyncJobRepository = AsyncHttpJobRepository(
+            creation_requester=creation_requester,
+            polling_requester=polling_requester,
+            download_requester=download_requester,
+            status_extractor=status_extractor,
+            status_mapping=status_mapping.parse_input(),
+            urls_extractor=urls_extractor,
+        )
+        job_orchestrator_factory = lambda stream_slices: AsyncJobOrchestrator(job_repository, stream_slices)
+
+        return AsyncRetriever(
+            name=name,
+            primary_key=primary_key,
+            job_orchestrator_factory=job_orchestrator_factory,
+            record_selector=record_selector,
+            stream_slicer=stream_slicer,
+            config=config,
+            parameters=model.parameters or {},
+        )
+
     @staticmethod
     def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
         return Spec(
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
new file mode 100644
index 000000000000..98794889a3cb
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
@@ -0,0 +1,163 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+
+import uuid
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, Mapping, Optional
+
+import requests
+from airbyte_cdk.models import FailureType
+from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
+from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
+from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
+from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor, RecordExtractor
+from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import ResponseToFileExtractor
+from airbyte_cdk.sources.declarative.requesters.requester import Requester
+from airbyte_cdk.sources.types import StreamSlice
+from airbyte_cdk.utils import AirbyteTracedException
+from requests import Response
+
+
+@dataclass
+class AsyncHttpJobRepository(AsyncJobRepository):
+    creation_requester: Requester
+    polling_requester: Requester
+    download_requester: Requester
+    status_extractor: DpathExtractor
+    status_mapping: Mapping[str, AsyncJobStatus]
+    urls_extractor: DpathExtractor
+
+    record_extractor: RecordExtractor = ResponseToFileExtractor()
+
+    def __post_init__(self) -> None:
+        self._create_job_response_by_id: Dict[str, Response] = {}
+        self._polling_job_response_by_id: Dict[str, Response] = {}
+
+    def _get_validated_polling_response(self, stream_slice: StreamSlice) -> requests.Response:
+        """
+        Validates and retrieves the pooling response for a given stream slice.
+
+        Args:
+            stream_slice (StreamSlice): The stream slice to send the pooling request for.
+
+        Returns:
+            requests.Response: The validated pooling response.
+
+        Raises:
+            AirbyteTracedException: If the polling request returns an empty response.
+        """
+
+        polling_response: Optional[requests.Response] = self.polling_requester.send_request(stream_slice=stream_slice)
+        if polling_response is None:
+            raise AirbyteTracedException(
+                internal_message="Polling Requester received an empty Response.",
+                failure_type=FailureType.system_error,
+            )
+        return polling_response
+
+    def _get_validated_job_status(self, response: requests.Response) -> AsyncJobStatus:
+        """
+        Validates the job status extracted from the API response.
+
+        Args:
+            response (requests.Response): The API response.
+
+        Returns:
+            AsyncJobStatus: The validated job status.
+
+        Raises:
+            ValueError: If the API status is unknown.
+        """
+
+        api_status = next(iter(self.status_extractor.extract_records(response)), None)
+        job_status = self.status_mapping.get(str(api_status), None)
+        if job_status is None:
+            raise ValueError(
+                f"API status `{api_status}` is unknown. Contact the connector developer to make sure this status is supported."
+            )
+
+        return job_status
+
+    def _start_job_and_validate_response(self, stream_slice: StreamSlice) -> requests.Response:
+        """
+        Starts a job and validates the response.
+
+        Args:
+            stream_slice (StreamSlice): The stream slice to be used for the job.
+
+        Returns:
+            requests.Response: The response from the job creation requester.
+
+        Raises:
+            AirbyteTracedException: If no response is received from the creation requester.
+        """
+
+        response: Optional[requests.Response] = self.creation_requester.send_request(stream_slice=stream_slice)
+        if not response:
+            raise AirbyteTracedException(
+                internal_message="Always expect a response or an exception from creation_requester",
+                failure_type=FailureType.system_error,
+            )
+
+        return response
+
+    def start(self, stream_slice: StreamSlice) -> AsyncJob:
+        """
+        Starts a job for the given stream slice.
+
+        Args:
+            stream_slice (StreamSlice): The stream slice to start the job for.
+
+        Returns:
+            AsyncJob: The asynchronous job object representing the started job.
+        """
+
+        response: requests.Response = self._start_job_and_validate_response(stream_slice)
+        job_id: str = str(uuid.uuid4())
+        self._create_job_response_by_id[job_id] = response
+
+        return AsyncJob(api_job_id=job_id, job_parameters=stream_slice)
+
+    def update_jobs_status(self, jobs: Iterable[AsyncJob]) -> None:
+        """
+        Updates the status of multiple jobs.
+
+        Args:
+            jobs (Iterable[AsyncJob]): An iterable of AsyncJob objects representing the jobs to update.
+
+        Returns:
+            None
+        """
+        for job in jobs:
+            stream_slice = StreamSlice(
+                partition={"create_job_response": self._create_job_response_by_id[job.api_job_id()]},
+                cursor_slice={},
+            )
+            polling_response: requests.Response = self._get_validated_polling_response(stream_slice)
+            job_status: AsyncJobStatus = self._get_validated_job_status(polling_response)
+            job.update_status(job_status)
+            if job_status == AsyncJobStatus.COMPLETED:
+                self._polling_job_response_by_id[job.api_job_id()] = polling_response
+
+    def fetch_records(self, job: AsyncJob) -> Iterable[Mapping[str, Any]]:
+        """
+        Fetches records from the given job.
+
+        Args:
+            job (AsyncJob): The job to fetch records from.
+
+        Yields:
+            Iterable[Mapping[str, Any]]: A generator that yields records as dictionaries.
+
+        """
+
+        for url in self.urls_extractor.extract_records(self._polling_job_response_by_id[job.api_job_id()]):
+            stream_slice: StreamSlice = StreamSlice(partition={"url": url}, cursor_slice={})
+            # FIXME salesforce will require pagination here
+            response = self.download_requester.send_request(stream_slice=stream_slice)
+            if response:
+                yield from self.record_extractor.extract_records(response)
+
+        yield from []
+
+        # FIXME clean self._create_job_response_by_id and self._polling_job_response_by_id
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/__init__.py
index fcbe40d95414..9ec5017fb38c 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/__init__.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/__init__.py
@@ -4,5 +4,6 @@
 
 from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
 from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever, SimpleRetrieverTestReadDecorator
+from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
 
-__all__ = ["Retriever", "SimpleRetriever", "SimpleRetrieverTestReadDecorator"]
+__all__ = ["Retriever", "SimpleRetriever", "SimpleRetrieverTestReadDecorator", "AsyncRetriever"]
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
new file mode 100644
index 000000000000..018bda819bd7
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
@@ -0,0 +1,140 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+
+from dataclasses import InitVar, dataclass, field
+from typing import Any, Callable, Iterable, List, Mapping, Optional, Union
+
+from airbyte_cdk.models import FailureType
+from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator, AsyncPartition
+from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
+from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
+from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter
+from airbyte_cdk.sources.declarative.retrievers import Retriever
+from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
+from airbyte_cdk.sources.streams.core import StreamData
+from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
+from airbyte_cdk.utils.traced_exception import AirbyteTracedException
+
+
+@dataclass
+class AsyncRetriever(Retriever):
+    config: Config
+    parameters: InitVar[Mapping[str, Any]]
+    job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
+    record_selector: RecordSelector
+    name: str  # type: ignore
+    _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
+    primary_key: Optional[Union[str, List[str], List[List[str]]]]  # type: ignore
+    _primary_key: str = field(init=False, repr=False, default="")
+    stream_slicer: StreamSlicer = field(default_factory=lambda: SinglePartitionRouter(parameters={}))
+
+    def __post_init__(self, parameters: Mapping[str, Any]) -> None:
+        self._job_orchestrator_factory = self.job_orchestrator_factory
+        self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
+        self._parameters = parameters
+        self._name = InterpolatedString(self._name, parameters=parameters) if isinstance(self._name, str) else self._name
+
+    @property
+    def name(self) -> str:
+        """
+        :return: Stream name
+        """
+        return str(self._name.eval(self.config)) if isinstance(self._name, InterpolatedString) else self._name
+
+    @name.setter
+    def name(self, value: str) -> None:
+        if not isinstance(value, property):
+            self._name = value
+
+    @property
+    def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
+        """The stream's primary key"""
+        return self._primary_key
+
+    @primary_key.setter
+    def primary_key(self, value: str) -> None:
+        if not isinstance(value, property):
+            self._primary_key = value
+
+    @property
+    def state(self) -> StreamState:
+        """
+        As a first iteration for sendgrid, there is no state to be managed
+        """
+        return {}
+
+    @state.setter
+    def state(self, value: StreamState) -> None:
+        """
+        As a first iteration for sendgrid, there is no state to be managed
+        """
+        pass
+
+    @property
+    def _job_orchestrator(self) -> AsyncJobOrchestrator:
+        if not self.__job_orchestrator:
+            raise AirbyteTracedException(
+                message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
+                internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
+                failure_type=FailureType.system_error,
+            )
+
+        return self.__job_orchestrator
+
+    def _get_stream_state(self) -> StreamState:
+        """
+        Gets the current state of the stream.
+
+        Returns:
+            StreamState: Mapping[str, Any]
+        """
+
+        return self.state
+
+    def _validate_and_get_stream_slice_partition(self, stream_slice: Optional[StreamSlice] = None) -> AsyncPartition:
+        """
+        Validates the stream_slice argument and returns the partition from it.
+
+        Args:
+            stream_slice (Optional[StreamSlice]): The stream slice to validate and extract the partition from.
+
+        Returns:
+            AsyncPartition: The partition extracted from the stream_slice.
+
+        Raises:
+            AirbyteTracedException: If the stream_slice is not an instance of StreamSlice or if the partition is not present in the stream_slice.
+
+        """
+        if not isinstance(stream_slice, StreamSlice) or "partition" not in stream_slice.partition:
+            raise AirbyteTracedException(
+                message="Invalid arguments to AsyncJobRetriever.read_records: stream_slice is no optional. Please contact Airbyte Support",
+                failure_type=FailureType.system_error,
+            )
+        return stream_slice["partition"]
+
+    def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
+        slices = self.stream_slicer.stream_slices()
+        self.__job_orchestrator = self._job_orchestrator_factory(slices)
+
+        for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
+            yield StreamSlice(
+                partition=dict(completed_partition.stream_slice.partition) | {"partition": completed_partition},
+                cursor_slice=completed_partition.stream_slice.cursor_slice,
+            )
+
+    def read_records(
+        self,
+        records_schema: Mapping[str, Any],
+        stream_slice: Optional[StreamSlice] = None,
+    ) -> Iterable[StreamData]:
+
+        stream_state: StreamState = self._get_stream_state()
+        partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
+        records: Iterable[Mapping[str, Any]] = self._job_orchestrator.fetch_records(partition)
+
+        yield from self.record_selector.filter_and_transform(
+            all_data=records,
+            stream_state=stream_state,
+            records_schema=records_schema,
+            stream_slice=stream_slice,
+        )
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py
index 67290d2a5d95..2a69b78218fd 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/add_fields.py
@@ -3,12 +3,12 @@
 #
 
 from dataclasses import InitVar, dataclass, field
-from typing import Any, List, Mapping, Optional, Type, Union
+from typing import Any, Dict, List, Mapping, Optional, Type, Union
 
 import dpath
 from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
 from airbyte_cdk.sources.declarative.transformations import RecordTransformation
-from airbyte_cdk.sources.types import Config, FieldPointer, Record, StreamSlice, StreamState
+from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
 
 
 @dataclass(frozen=True)
@@ -111,11 +111,11 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
 
     def transform(
         self,
-        record: Record,
+        record: Dict[str, Any],
         config: Optional[Config] = None,
         stream_state: Optional[StreamState] = None,
         stream_slice: Optional[StreamSlice] = None,
-    ) -> Record:
+    ) -> None:
         if config is None:
             config = {}
         kwargs = {"record": record, "stream_state": stream_state, "stream_slice": stream_slice}
@@ -124,7 +124,5 @@ def transform(
             value = parsed_field.value.eval(config, valid_types=valid_types, **kwargs)
             dpath.new(record, parsed_field.path, value)
 
-        return record
-
     def __eq__(self, other: Any) -> bool:
         return bool(self.__dict__ == other.__dict__)
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
new file mode 100644
index 000000000000..e29bf10f10c6
--- /dev/null
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
@@ -0,0 +1,30 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+
+from dataclasses import InitVar, dataclass, field
+from typing import Any, Dict, List, Mapping, Optional, Type, Union
+
+import dpath
+from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
+from airbyte_cdk.sources.declarative.transformations import RecordTransformation
+from airbyte_cdk.sources.types import Config, FieldPointer, Record, StreamSlice, StreamState
+
+
+@dataclass
+class KeysToLowerTransformation(RecordTransformation):
+
+    parameters: InitVar[Mapping[str, Any]]
+
+    def __post_init__(self, parameters: Mapping[str, Any]) -> None:
+        pass  # parameters does not need to be considered here
+
+    def transform(
+        self,
+        record: Dict[str, Any],
+        config: Optional[Config] = None,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+    ) -> None:
+        for key in set(record.keys()):
+            record[key.lower()] = record.pop(key)
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py
index 1d4edfc39367..16c62042ef2f 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py
@@ -3,7 +3,7 @@
 #
 
 from dataclasses import InitVar, dataclass
-from typing import Any, List, Mapping, Optional
+from typing import Any, Dict, List, Mapping, Optional
 
 import dpath
 import dpath.exceptions
@@ -48,11 +48,11 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
 
     def transform(
         self,
-        record: Mapping[str, Any],
+        record: Dict[str, Any],
         config: Optional[Config] = None,
         stream_state: Optional[StreamState] = None,
         stream_slice: Optional[StreamSlice] = None,
-    ) -> Mapping[str, Any]:
+    ) -> None:
         """
         :param record: The record to be transformed
         :return: the input record with the requested fields removed
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py
index bd66f5fae119..f5b22642964b 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/transformation.py
@@ -4,9 +4,9 @@
 
 from abc import abstractmethod
 from dataclasses import dataclass
-from typing import Any, Mapping, Optional
+from typing import Any, Dict, Optional
 
-from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
+from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
 
 
 @dataclass
@@ -18,13 +18,13 @@ class RecordTransformation:
     @abstractmethod
     def transform(
         self,
-        record: Record,
+        record: Dict[str, Any],
         config: Optional[Config] = None,
         stream_state: Optional[StreamState] = None,
         stream_slice: Optional[StreamSlice] = None,
-    ) -> Mapping[str, Any]:
+    ) -> None:
         """
-        Transform a record by adding, deleting, or mutating fields.
+        Transform a record by adding, deleting, or mutating fields directly from the record reference passed in argument.
 
         :param record: The input record to be transformed
         :param config: The user-provided configuration as specified by the source's spec
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/__init__.py b/airbyte-cdk/python/unit_tests/sources/declarative/job/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
new file mode 100644
index 000000000000..bbf80c59c294
--- /dev/null
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+
+import logging
+from typing import Any, Iterable, List, Mapping, Optional, Set, Tuple
+from unittest import TestCase, mock
+
+from airbyte_cdk import AbstractSource, DeclarativeStream, SinglePartitionRouter, Stream, StreamSlice
+from airbyte_cdk.models import ConnectorSpecification
+from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
+from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
+from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
+from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
+from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
+from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
+from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
+from airbyte_cdk.sources.declarative.schema import InlineSchemaLoader
+from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
+from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
+from airbyte_cdk.test.catalog_builder import CatalogBuilder, ConfiguredAirbyteStreamBuilder
+from airbyte_cdk.test.entrypoint_wrapper import read
+
+_A_STREAM_NAME = "a_stream_name"
+_EXTRACTOR_NOT_USED: RecordExtractor = None  # type: ignore  # the extractor should not be used. If it is the case, there is an issue that needs fixing
+
+
+class MockAsyncJobRepository(AsyncJobRepository):
+
+    def start(self, stream_slice: StreamSlice) -> AsyncJob:
+        return AsyncJob("a_job_id", StreamSlice(partition={}, cursor_slice={}))
+
+    def update_jobs_status(self, jobs: Set[AsyncJob]) -> None:
+        for job in jobs:
+            job.update_status(AsyncJobStatus.COMPLETED)
+
+    def fetch_records(self, job: AsyncJob) -> Iterable[Mapping[str, Any]]:
+        yield from [{"record_field": 10}]
+
+
+class MockSource(AbstractSource):
+
+    def __init__(self, stream_slicer: Optional[StreamSlicer] = None) -> None:
+        self._stream_slicer = SinglePartitionRouter({}) if stream_slicer is None else stream_slicer
+
+    def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
+        return True, None
+
+    def spec(self, logger: logging.Logger) -> ConnectorSpecification:
+        return ConnectorSpecification(connectionSpecification={})
+
+    def streams(self, config: Mapping[str, Any]) -> List[Stream]:
+        job_orchestrator_factory_fn = lambda stream_slices: AsyncJobOrchestrator(
+            MockAsyncJobRepository(), stream_slices,
+        )
+
+        noop_record_selector = RecordSelector(
+            extractor=_EXTRACTOR_NOT_USED,
+            config={},
+            parameters={},
+            schema_normalization=TypeTransformer(TransformConfig.NoTransform),
+            record_filter=None,
+            transformations=[]
+        )
+        return [
+            DeclarativeStream(
+                retriever=AsyncRetriever(
+                    name="test_async_retriever",
+                    primary_key="id",
+                    config={},
+                    parameters={},
+                    record_selector=noop_record_selector,
+                    stream_slicer=self._stream_slicer,
+                    job_orchestrator_factory=job_orchestrator_factory_fn,
+                ),
+                config={},
+                parameters={},
+                name=_A_STREAM_NAME,
+                primary_key=["id"],
+                schema_loader=InlineSchemaLoader({}, {}),
+                # the interface mentions that this is Optional,
+                # but I get `'NoneType' object has no attribute 'eval'` by passing None
+                stream_cursor_field="",
+            )
+        ]
+
+
+class JobDeclarativeStreamTest(TestCase):
+    _CONFIG: Mapping[str, Any] = {}
+
+    def setUp(self) -> None:
+        self._stream_slicer = mock.Mock(wraps=SinglePartitionRouter({}))
+        self._source = MockSource(self._stream_slicer)
+        self._source.streams({})
+
+    def test_when_read_then_return_records_from_repository(self) -> None:
+        output = read(
+            self._source, 
+            self._CONFIG, 
+            CatalogBuilder().with_stream(ConfiguredAirbyteStreamBuilder().with_name(_A_STREAM_NAME)).build()
+        )
+
+        assert len(output.records) == 1
+
+    def test_when_read_then_call_stream_slices_only_once(self) -> None:
+        """
+        As generating stream slices is very expensive, we want to ensure that during a read, it is only called once.
+        """
+        output = read(
+            self._source,
+            self._CONFIG,
+            CatalogBuilder().with_stream(ConfiguredAirbyteStreamBuilder().with_name(_A_STREAM_NAME)).build()
+        )
+
+        assert not output.errors
+        assert self._stream_slicer.stream_slices.call_count == 1
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py
new file mode 100644
index 000000000000..c83f50ceeb17
--- /dev/null
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py
@@ -0,0 +1,146 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+import logging
+from typing import Callable, List, Mapping, Set
+from unittest import TestCase, mock
+from unittest.mock import MagicMock, Mock, call
+
+import pytest
+from airbyte_cdk import AirbyteTracedException, StreamSlice
+from airbyte_cdk.sources.declarative.async_job.job import AsyncJob, AsyncJobStatus
+from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator, AsyncPartition
+from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
+
+_ANY_STREAM_SLICE = Mock()
+_A_STREAM_SLICE = Mock()
+_ANOTHER_STREAM_SLICE = Mock()
+_ANY_RECORD = {"a record field": "a record value"}
+
+
+def _create_job(status: AsyncJobStatus = AsyncJobStatus.FAILED) -> AsyncJob:
+    job = Mock(spec=AsyncJob)
+    job.status.return_value = status
+    return job
+
+
+class AsyncPartitionTest(TestCase):
+    def test_given_one_failed_job_when_status_then_return_failed(self) -> None:
+        partition = AsyncPartition([_create_job(status) for status in AsyncJobStatus], _ANY_STREAM_SLICE)
+        assert partition.status == AsyncJobStatus.FAILED
+
+    def test_given_all_status_except_failed_when_status_then_return_timed_out(self) -> None:
+        statuses = [status for status in AsyncJobStatus if status != AsyncJobStatus.FAILED]
+        partition = AsyncPartition([_create_job(status) for status in statuses], _ANY_STREAM_SLICE)
+        assert partition.status == AsyncJobStatus.TIMED_OUT
+
+    def test_given_running_and_completed_jobs_when_status_then_return_running(self) -> None:
+        partition = AsyncPartition([_create_job(AsyncJobStatus.RUNNING), _create_job(AsyncJobStatus.COMPLETED)], _ANY_STREAM_SLICE)
+        assert partition.status == AsyncJobStatus.RUNNING
+
+    def test_given_only_completed_jobs_when_status_then_return_running(self) -> None:
+        partition = AsyncPartition([_create_job(AsyncJobStatus.COMPLETED) for _ in range(10)], _ANY_STREAM_SLICE)
+        assert partition.status == AsyncJobStatus.COMPLETED
+
+
+def _status_update_per_jobs(status_update_per_jobs: Mapping[AsyncJob, List[AsyncJobStatus]]) -> Callable[[set[AsyncJob]], None]:
+    status_index_by_job = {job: 0 for job in status_update_per_jobs.keys()}
+
+    def _update_status(jobs: Set[AsyncJob]) -> None:
+        for job in jobs:
+            status_index = status_index_by_job[job]
+            job.update_status(status_update_per_jobs[job][status_index])
+            status_index_by_job[job] += 1
+
+    return _update_status
+
+
+
+
+sleep_mock_target = "airbyte_cdk.sources.declarative.async_job.job_orchestrator.time.sleep"
+
+class AsyncJobOrchestratorTest(TestCase):
+    def setUp(self) -> None:
+        self._job_repository = Mock(spec=AsyncJobRepository)
+        self._logger = Mock(spec=logging.Logger)
+
+        self._job_for_a_slice = mock.Mock(wraps=AsyncJob("an api job id", _A_STREAM_SLICE))
+        self._job_for_another_slice = mock.Mock(wraps=AsyncJob("another api job id", _ANOTHER_STREAM_SLICE))
+
+    @mock.patch(sleep_mock_target)
+    def test_when_create_and_get_completed_partitions_then_create_job_and_update_status_until_completed(self, mock_sleep: MagicMock) -> None:
+        self._job_repository.start.return_value = self._job_for_a_slice
+        status_updates = [AsyncJobStatus.RUNNING, AsyncJobStatus.RUNNING, AsyncJobStatus.COMPLETED]
+        self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs(
+            {
+                self._job_for_a_slice: status_updates
+            }
+        )
+        orchestrator = self._orchestrator([_A_STREAM_SLICE])
+
+        partitions = list(orchestrator.create_and_get_completed_partitions())
+
+        assert len(partitions) == 1
+        assert partitions[0].status == AsyncJobStatus.COMPLETED
+        assert self._job_for_a_slice.update_status.mock_calls == [call(status) for status in status_updates]
+
+    @mock.patch(sleep_mock_target)
+    def test_given_one_job_still_running_when_create_and_get_completed_partitions_then_only_update_running_job_status(self, mock_sleep: MagicMock) -> None:
+        self._job_repository.start.side_effect = [self._job_for_a_slice, self._job_for_another_slice]
+        self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs(
+            {
+                self._job_for_a_slice: [AsyncJobStatus.COMPLETED],
+                self._job_for_another_slice: [AsyncJobStatus.RUNNING, AsyncJobStatus.COMPLETED],
+            }
+        )
+        orchestrator = self._orchestrator([_A_STREAM_SLICE, _ANOTHER_STREAM_SLICE])
+
+        list(orchestrator.create_and_get_completed_partitions())
+
+        assert self._job_repository.update_jobs_status.mock_calls == [
+            call({self._job_for_a_slice, self._job_for_another_slice}),
+            call({self._job_for_another_slice}),
+        ]
+
+    @mock.patch(sleep_mock_target)
+    def test_given_timeout_when_create_and_get_completed_partitions_then_raise_exception(self, mock_sleep: MagicMock) -> None:
+        self._job_repository.start.return_value = self._job_for_a_slice
+        self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs(
+            {
+                self._job_for_a_slice: [AsyncJobStatus.TIMED_OUT]
+            }
+        )
+        orchestrator = self._orchestrator([_A_STREAM_SLICE])
+
+        with pytest.raises(AirbyteTracedException):
+            list(orchestrator.create_and_get_completed_partitions())
+        assert self._job_repository.start.call_args_list == [call(_A_STREAM_SLICE)] * 4
+
+    @mock.patch(sleep_mock_target)
+    def test_given_failure_when_create_and_get_completed_partitions_then_raise_exception(self, mock_sleep: MagicMock) -> None:
+        self._job_repository.start.return_value = self._job_for_a_slice
+        self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs(
+            {
+                self._job_for_a_slice: [AsyncJobStatus.FAILED]
+            }
+        )
+        orchestrator = self._orchestrator([_A_STREAM_SLICE])
+
+        with pytest.raises(AirbyteTracedException):
+            list(orchestrator.create_and_get_completed_partitions())
+        assert self._job_repository.start.call_args_list == [call(_A_STREAM_SLICE)] * 4
+
+
+    def test_when_fetch_records_then_yield_records_from_each_job(self) -> None:
+        self._job_repository.fetch_records.return_value = [_ANY_RECORD]
+        orchestrator = self._orchestrator([_A_STREAM_SLICE])
+        first_job = _create_job()
+        second_job = _create_job()
+        partition = AsyncPartition([first_job, second_job], _A_STREAM_SLICE)
+
+        records = list(orchestrator.fetch_records(partition))
+
+        assert len(records) == 2
+        assert self._job_repository.fetch_records.mock_calls == [call(first_job), call(second_job)]
+
+    def _orchestrator(self, slices: List[StreamSlice]) -> AsyncJobOrchestrator:
+        return AsyncJobOrchestrator(self._job_repository, slices)
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_job_repository.py b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_job_repository.py
new file mode 100644
index 000000000000..17966e963eac
--- /dev/null
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/requesters/test_http_job_repository.py
@@ -0,0 +1,180 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+
+import json
+from unittest import TestCase
+from unittest.mock import Mock
+
+import pytest
+from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
+from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
+from airbyte_cdk.sources.declarative.extractors import DpathExtractor
+from airbyte_cdk.sources.declarative.requesters.error_handlers import DefaultErrorHandler
+from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
+from airbyte_cdk.sources.declarative.requesters.http_requester import HttpRequester
+from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
+from airbyte_cdk.sources.types import StreamSlice
+from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
+
+_ANY_CONFIG = {}
+_ANY_SLICE = StreamSlice(partition={}, cursor_slice={})
+_URL_BASE = "https://api.sendgrid.com/v3/"
+_EXPORT_PATH = "marketing/contacts/exports"
+_EXPORT_URL = f"{_URL_BASE}{_EXPORT_PATH}"
+_A_JOB_ID = "a-job-id"
+_ANOTHER_JOB_ID = "another-job-id"
+_JOB_FIRST_URL = "https://job.result.api.com/1"
+_JOB_SECOND_URL = "https://job.result.api.com/2"
+_A_CSV_WITH_ONE_RECORD = """id,value
+a_record_id,a_value
+"""
+
+
+class HttpJobRepositoryTest(TestCase):
+    def setUp(self) -> None:
+        message_repository = Mock()
+        error_handler = DefaultErrorHandler(config=_ANY_CONFIG, parameters={})
+
+        self._create_job_requester = HttpRequester(
+            name="stream <name>: create_job",
+            url_base=_URL_BASE,
+            path=_EXPORT_PATH,
+            error_handler=error_handler,
+            http_method=HttpMethod.POST,
+            config=_ANY_CONFIG,
+            disable_retries=False,
+            parameters={},
+            message_repository=message_repository,
+            use_cache=False,
+            stream_response=False,
+        )
+
+        self._polling_job_requester = HttpRequester(
+            name="stream <name>: polling",
+            url_base=_URL_BASE,
+            path=_EXPORT_PATH + "/{{stream_slice['create_job_response'].json()['id']}}",
+            error_handler=error_handler,
+            http_method=HttpMethod.GET,
+            config=_ANY_CONFIG,
+            disable_retries=False,
+            parameters={},
+            message_repository=message_repository,
+            use_cache=False,
+            stream_response=False,
+        )
+
+        self._download_job_requester = HttpRequester(
+            name="stream <name>: fetch_result",
+            url_base="",
+            path="{{stream_slice['url']}}",
+            error_handler=error_handler,
+            http_method=HttpMethod.GET,
+            config=_ANY_CONFIG,
+            disable_retries=False,
+            parameters={},
+            message_repository=message_repository,
+            use_cache=False,
+            stream_response=True,
+        )
+
+        self._repository = AsyncHttpJobRepository(
+            creation_requester=self._create_job_requester,
+            polling_requester=self._polling_job_requester,
+            download_requester=self._download_job_requester,
+            status_extractor=DpathExtractor(decoder=JsonDecoder(parameters={}), field_path=["status"], config={}, parameters={} or {}),
+            status_mapping={
+                "ready": AsyncJobStatus.COMPLETED,
+                "failure": AsyncJobStatus.FAILED,
+                "pending": AsyncJobStatus.RUNNING,
+            },
+            urls_extractor=DpathExtractor(decoder=JsonDecoder(parameters={}), field_path=["urls"], config={}, parameters={} or {}),
+        )
+
+        self._http_mocker = HttpMocker()
+        self._http_mocker.__enter__()
+
+    def tearDown(self) -> None:
+        self._http_mocker.__exit__(None, None, None)
+
+    def test_given_different_statuses_when_update_jobs_status_then_update_status_properly(self) -> None:
+        self._mock_create_response(_A_JOB_ID)
+        self._http_mocker.get(
+            HttpRequest(url=f"{_EXPORT_URL}/{_A_JOB_ID}"),
+            [
+                HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "pending"})),
+                HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "failure"})),
+                HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "ready"})),
+            ]
+        )
+        job = self._repository.start(_ANY_SLICE)
+
+        self._repository.update_jobs_status([job])
+        assert job.status() == AsyncJobStatus.RUNNING
+        self._repository.update_jobs_status([job])
+        assert job.status() == AsyncJobStatus.FAILED
+        self._repository.update_jobs_status([job])
+        assert job.status() == AsyncJobStatus.COMPLETED
+
+    def test_given_unknown_status_when_update_jobs_status_then_raise_error(self) -> None:
+        self._mock_create_response(_A_JOB_ID)
+        self._http_mocker.get(
+            HttpRequest(url=f"{_EXPORT_URL}/{_A_JOB_ID}"),
+            HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "invalid_status"})),
+        )
+        job = self._repository.start(_ANY_SLICE)
+
+        with pytest.raises(ValueError):
+            self._repository.update_jobs_status([job])
+
+    def test_given_multiple_jobs_when_update_jobs_status_then_all_the_jobs_are_updated(self) -> None:
+        self._mock_create_response(_A_JOB_ID)
+        self._http_mocker.get(
+            HttpRequest(url=f"{_EXPORT_URL}/{_A_JOB_ID}"),
+            HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "ready"})),
+        )
+        self._mock_create_response(_ANOTHER_JOB_ID)
+        self._http_mocker.get(
+            HttpRequest(url=f"{_EXPORT_URL}/{_ANOTHER_JOB_ID}"),
+            HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "ready"})),
+        )
+        a_job = self._repository.start(_ANY_SLICE)
+        another_job = self._repository.start(_ANY_SLICE)
+
+        self._repository.update_jobs_status([a_job, another_job])
+
+        assert a_job.status() == AsyncJobStatus.COMPLETED
+        assert another_job.status() == AsyncJobStatus.COMPLETED
+
+    def test_given_multiple_urls_when_fetch_records_then_fetch_from_multiple_urls(self) -> None:
+        self._mock_create_response(_A_JOB_ID)
+        self._http_mocker.get(
+            HttpRequest(url=f"{_EXPORT_URL}/{_A_JOB_ID}"),
+            HttpResponse(body=json.dumps({
+                "id": _A_JOB_ID,
+                "status": "ready",
+                "urls": [
+                    _JOB_FIRST_URL,
+                    _JOB_SECOND_URL,
+                ]
+            }))
+        )
+        self._http_mocker.get(
+            HttpRequest(url=_JOB_FIRST_URL),
+            HttpResponse(body=_A_CSV_WITH_ONE_RECORD),
+        )
+        self._http_mocker.get(
+            HttpRequest(url=_JOB_SECOND_URL),
+            HttpResponse(body=_A_CSV_WITH_ONE_RECORD),
+        )
+
+        job = self._repository.start(_ANY_SLICE)
+        self._repository.update_jobs_status([job])
+        records = list(self._repository.fetch_records(job))
+
+        assert len(records) == 2
+
+    def _mock_create_response(self, job_id: str) -> None:
+        self._http_mocker.post(
+            HttpRequest(url=_EXPORT_URL),
+            HttpResponse(body=json.dumps({"id": job_id})),
+        )
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py
index 83d5f19e8585..9b46cf49b99b 100644
--- a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_add_fields.py
@@ -132,4 +132,5 @@ def test_add_fields(
     expected: Mapping[str, Any],
 ):
     inputs = [AddedFieldDefinition(path=v[0], value=v[1], value_type=field_type, parameters={}) for v in field]
-    assert AddFields(fields=inputs, parameters={"alas": "i live"}).transform(input_record, **kwargs) == expected
+    AddFields(fields=inputs, parameters={"alas": "i live"}).transform(input_record, **kwargs)
+    assert input_record == expected
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_keys_to_lower_transformation.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_keys_to_lower_transformation.py
new file mode 100644
index 000000000000..591ad06f97c7
--- /dev/null
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_keys_to_lower_transformation.py
@@ -0,0 +1,13 @@
+#
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+#
+
+from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import KeysToLowerTransformation
+
+_ANY_VALUE = -1
+
+
+def test_transform() -> None:
+    record = {"wIth_CapITal": _ANY_VALUE, "anOThEr_witH_Caps": _ANY_VALUE}
+    KeysToLowerTransformation(parameters={}).transform(record)
+    assert {"with_capital": _ANY_VALUE, "another_with_caps": _ANY_VALUE}
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py
index 0b9d6da5b56d..89b17e8d0f75 100644
--- a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_remove_fields.py
@@ -85,4 +85,5 @@
 )
 def test_remove_fields(input_record: Mapping[str, Any], field_pointers: List[FieldPointer], condition: str, expected: Mapping[str, Any]):
     transformation = RemoveFields(field_pointers=field_pointers, condition=condition, parameters={})
-    assert transformation.transform(input_record) == expected
+    transformation.transform(input_record)
+    assert input_record == expected

From 61b0031c66375647f727010c26c412575cca28bb Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Thu, 5 Sep 2024 14:21:20 -0400
Subject: [PATCH 02/18] Remove FIXME that can be addressed today

---
 .../declarative/extractors/response_to_file_extractor.py  | 8 +++++---
 .../sources/declarative/requesters/http_job_repository.py | 6 +++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
index 1631e0f7d53a..8e8dcf407fb9 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
@@ -1,7 +1,7 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
-
+import logging
 import os
 import uuid
 import zlib
@@ -19,7 +19,6 @@
 DOWNLOAD_CHUNK_SIZE: int = 1024 * 1024 * 10
 
 
-@dataclass
 class ResponseToFileExtractor(RecordExtractor):
     """
     This class is used when having very big HTTP responses (usually streamed) which would require too much memory so we use disk space as
@@ -29,6 +28,9 @@ class ResponseToFileExtractor(RecordExtractor):
     a first iteration so we will only support CSV parsing using pandas as salesforce and sendgrid were doing.
     """
 
+    def __init__(self) -> None:
+        self.logger = logging.getLogger("airbyte")
+
     def _get_response_encoding(self, headers: Dict[str, Any]) -> str:
         """
         Get the encoding of the response based on the provided headers. This method is heavily inspired by the requests library
@@ -132,7 +134,7 @@ def _read_with_chunks(self, path: str, file_encoding: str, chunk_size: int = 100
                     for row in chunk:
                         yield row
         except pd.errors.EmptyDataError as e:
-            # FIXME logger.info(f"Empty data received. {e}")
+            self.logger.info(f"Empty data received. {e}")
             yield from []
         except IOError as ioe:
             raise ValueError(f"The IO/Error occured while reading tmp data. Called: {path}", ioe)
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
index 98794889a3cb..5b26c11773a3 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
@@ -160,4 +160,8 @@ def fetch_records(self, job: AsyncJob) -> Iterable[Mapping[str, Any]]:
 
         yield from []
 
-        # FIXME clean self._create_job_response_by_id and self._polling_job_response_by_id
+        self._clean_up_job(job.api_job_id())
+
+    def _clean_up_job(self, job_id: str) -> None:
+        del self._create_job_response_by_id[job_id]
+        del self._polling_job_response_by_id[job_id]

From fbb0f805460c3e225a1960e6c8639bf8f90cdfe1 Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Thu, 5 Sep 2024 14:25:18 -0400
Subject: [PATCH 03/18] Fix TODOs

---
 .../sources/declarative/async_job/job_orchestrator.py          | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
index 098800bcab57..ff4c37c4f216 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
@@ -68,7 +68,8 @@ def status(self) -> AsyncJobStatus:
         else:
             return AsyncJobStatus.RUNNING
 
-    # TODO def __repr__(self) -> str: for slice printing
+    def __repr__(self) -> str:
+        return f"AsyncPartition(stream_slice={self._stream_slice}, attempt_per_job={self._attempts_per_job})"
 
 
 class AsyncJobOrchestrator:

From fdb1d7b4aadc08709316a48ffe85bc960f0439bd Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Thu, 5 Sep 2024 14:48:28 -0400
Subject: [PATCH 04/18] Update status mapping

---
 .../sources/declarative/async_job/status.py   | 38 -------------------
 .../declarative_component_schema.yaml         | 16 ++++++--
 .../models/declarative_component_schema.py    |  8 ++--
 .../parsers/model_to_component_factory.py     | 33 +++++++++++++---
 4 files changed, 43 insertions(+), 52 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
index 175dc6b78a66..ece158d11041 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
@@ -1,11 +1,7 @@
 # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
 
 
-from dataclasses import InitVar, dataclass
 from enum import Enum
-from typing import Any, List, Mapping
-
-from airbyte_cdk.sources.declarative.models.declarative_component_schema import AsyncJobStatusMap as AsyncJobStatusMapModel
 
 
 class AsyncJobStatus(Enum):
@@ -13,37 +9,3 @@ class AsyncJobStatus(Enum):
     COMPLETED = "COMPLETED"
     FAILED = "FAILED"
     TIMED_OUT = "TIMED_OUT"
-
-
-@dataclass
-class AsyncJobStatusMap:
-    model: AsyncJobStatusMapModel
-    parameters: InitVar[Mapping[str, Any]]
-
-    def parse_input(self) -> Mapping[str, AsyncJobStatus]:
-        """
-        Parses the input and returns a mapping of status values.
-
-        Returns:
-            A mapping of status values where the keys are the input values and the values are the corresponding AsyncJobStatus.
-
-        Example:
-            {
-                'running': AsyncJobStatus.RUNNING,
-                'completed': AsyncJobStatus.COMPLETED,
-                'failed': AsyncJobStatus.FAILED,
-                'timeout': AsyncJobStatus.TIMED_OUT,
-            }
-        """
-        status_mapping = {}
-        for key, value in self.model.dict().items():
-            match key:
-                case "running":
-                    status_mapping[value] = AsyncJobStatus.RUNNING
-                case "completed":
-                    status_mapping[value] = AsyncJobStatus.COMPLETED
-                case "failed":
-                    status_mapping[value] = AsyncJobStatus.FAILED
-                case "timeout":
-                    status_mapping[value] = AsyncJobStatus.TIMED_OUT
-        return status_mapping
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
index eb7fd3213e66..90fcf9279434 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -2369,13 +2369,21 @@ definitions:
         type: string
         enum: [AsyncJobStatusMap]
       running:
-        type: string
+        type: array
+        items:
+          type: string
       completed:
-        type: string
+        type: array
+        items:
+          type: string
       failed:
-        type: string
+        type: array
+        items:
+          type: string
       timeout:
-        type: string
+        type: array
+        items:
+          type: string
   AsyncRetriever:
     description: Retrieves records by Asynchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router.
     type: object
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
index c5ab599e030d..9160404ec960 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
@@ -888,10 +888,10 @@ class LegacySessionTokenAuthenticator(BaseModel):
 
 class AsyncJobStatusMap(BaseModel):
     type: Optional[Literal['AsyncJobStatusMap']] = None
-    running: str
-    completed: str
-    failed: str
-    timeout: str
+    running: List[str]
+    completed: List[str]
+    failed: List[str]
+    timeout: List[str]
 
 
 class ValueType(Enum):
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index 958e8f531013..d047506b408d 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -156,7 +156,7 @@
 
 from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
 from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
-from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatusMap
+from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
 from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
 
 
@@ -240,7 +240,6 @@ def _init_mappings(self) -> None:
             WaitTimeFromHeaderModel: self.create_wait_time_from_header,
             WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
             AsyncRetrieverModel: self.create_async_retriever,
-            AsyncJobStatusMapModel: self.create_async_job_status_mapping,
         }
 
         # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1177,8 +1176,31 @@ def create_simple_retriever(
             parameters=model.parameters or {},
         )
 
-    def create_async_job_status_mapping(self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any) -> AsyncJobStatusMap:
-        return AsyncJobStatusMap(model=model, parameters={})
+    def _create_async_job_status_mapping(
+        self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
+    ) -> Mapping[str, AsyncJobStatus]:
+        api_status_to_cdk_status = {}
+        for cdk_status, api_statuses in model.dict().items():
+            for status in api_statuses:
+                if status in api_status_to_cdk_status:
+                    raise ValueError(
+                        f"API status {status} is already set for CDK status {cdk_status}. Please ensure API statuses are only provided once"
+                    )
+                api_status_to_cdk_status[status] = self._get_async_job_status(cdk_status)
+        return api_status_to_cdk_status
+
+    def _get_async_job_status(self, status: str) -> AsyncJobStatus:
+        match status:
+            case "running":
+                return AsyncJobStatus.RUNNING
+            case "completed":
+                return AsyncJobStatus.COMPLETED
+            case "failed":
+                return AsyncJobStatus.FAILED
+            case "timeout":
+                return AsyncJobStatus.TIMED_OUT
+            case _:
+                raise ValueError(f"Unsupported CDK status {status}")
 
     def create_async_retriever(
         self,
@@ -1207,13 +1229,12 @@ def create_async_retriever(
         download_requester = self._create_component_from_model(model=model.download_requester, decoder=decoder, config=config, name=name)
         status_extractor = self._create_component_from_model(model=model.status_extractor, decoder=decoder, config=config, name=name)
         urls_extractor = self._create_component_from_model(model=model.urls_extractor, decoder=decoder, config=config, name=name)
-        status_mapping = self._create_component_from_model(model=model.status_mapping, config=config, name=name)
         job_repository: AsyncJobRepository = AsyncHttpJobRepository(
             creation_requester=creation_requester,
             polling_requester=polling_requester,
             download_requester=download_requester,
             status_extractor=status_extractor,
-            status_mapping=status_mapping.parse_input(),
+            status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
             urls_extractor=urls_extractor,
         )
         job_orchestrator_factory = lambda stream_slices: AsyncJobOrchestrator(job_repository, stream_slices)

From 8a66b337bc0058cc5bf08d8292058310a3015e4b Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Thu, 5 Sep 2024 14:59:44 -0400
Subject: [PATCH 05/18] Fix on status mapping

---
 .../sources/declarative/parsers/model_to_component_factory.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index d047506b408d..3154ddd9b643 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -1181,6 +1181,10 @@ def _create_async_job_status_mapping(
     ) -> Mapping[str, AsyncJobStatus]:
         api_status_to_cdk_status = {}
         for cdk_status, api_statuses in model.dict().items():
+            if cdk_status == "type":
+                # This is an element of the dict because of the typing of the CDK but it is not a CDK status
+                continue
+
             for status in api_statuses:
                 if status in api_status_to_cdk_status:
                     raise ValueError(

From 6d92f1387b5b1f75303aae8b65d01248a1e9a21b Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Thu, 5 Sep 2024 16:14:44 -0400
Subject: [PATCH 06/18] Fix mypy

---
 .../declarative/extractors/http_selector.py        |  1 -
 .../declarative/retrievers/async_retriever.py      | 14 +-------------
 .../sources/declarative/retrievers/retriever.py    |  1 -
 .../declarative/transformations/remove_fields.py   |  2 --
 airbyte-cdk/python/pyproject.toml                  |  2 +-
 5 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py
index e70ac150564c..b6da9c2556fb 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py
@@ -10,7 +10,6 @@
 from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
 
 
-@dataclass
 class HttpSelector:
     """
     Responsible for translating an HTTP response into a list of records by extracting records from the response and optionally filtering
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
index 018bda819bd7..7f04ca87ea88 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
@@ -22,9 +22,7 @@ class AsyncRetriever(Retriever):
     parameters: InitVar[Mapping[str, Any]]
     job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
     record_selector: RecordSelector
-    name: str  # type: ignore
     _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
-    primary_key: Optional[Union[str, List[str], List[List[str]]]]  # type: ignore
     _primary_key: str = field(init=False, repr=False, default="")
     stream_slicer: StreamSlicer = field(default_factory=lambda: SinglePartitionRouter(parameters={}))
 
@@ -41,21 +39,11 @@ def name(self) -> str:
         """
         return str(self._name.eval(self.config)) if isinstance(self._name, InterpolatedString) else self._name
 
-    @name.setter
-    def name(self, value: str) -> None:
-        if not isinstance(value, property):
-            self._name = value
-
     @property
     def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
         """The stream's primary key"""
         return self._primary_key
 
-    @primary_key.setter
-    def primary_key(self, value: str) -> None:
-        if not isinstance(value, property):
-            self._primary_key = value
-
     @property
     def state(self) -> StreamState:
         """
@@ -110,7 +98,7 @@ def _validate_and_get_stream_slice_partition(self, stream_slice: Optional[Stream
                 message="Invalid arguments to AsyncJobRetriever.read_records: stream_slice is no optional. Please contact Airbyte Support",
                 failure_type=FailureType.system_error,
             )
-        return stream_slice["partition"]
+        return stream_slice["partition"]  # type: ignore  # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
 
     def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
         slices = self.stream_slicer.stream_slices()
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py
index ddab62222694..1804cdbf5686 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py
@@ -11,7 +11,6 @@
 from airbyte_cdk.sources.types import StreamState
 
 
-@dataclass
 class Retriever:
     """
     Responsible for fetching a stream's records from an HTTP API source.
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py
index 16c62042ef2f..658d5dd2ccdb 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/remove_fields.py
@@ -68,5 +68,3 @@ def transform(
             except dpath.exceptions.PathNotFound:
                 # if the (potentially nested) property does not exist, silently skip
                 pass
-
-        return record
diff --git a/airbyte-cdk/python/pyproject.toml b/airbyte-cdk/python/pyproject.toml
index 2c5d56c676fd..6a4cd1a3d385 100644
--- a/airbyte-cdk/python/pyproject.toml
+++ b/airbyte-cdk/python/pyproject.toml
@@ -35,6 +35,7 @@ isodate = "~0.6.1"
 Jinja2 = "~3.1.2"
 jsonref = "~0.2"
 jsonschema = "~3.2.0"
+pandas = "2.2.0"
 pendulum = "<3.0.0"
 pydantic = "^2.7"
 pyrate-limiter = "~3.1.0"
@@ -51,7 +52,6 @@ langchain = { version = "0.1.16", optional = true }
 langchain_core = { version = "0.1.42", optional = true }
 markdown = { version = "*", optional = true }
 openai = { version = "0.27.9", extras = ["embeddings"], optional = true }
-pandas = { version = "2.2.0", optional = true }
 pdf2image = { version = "1.16.3", optional = true }
 "pdfminer.six" = { version = "20221105", optional = true }
 pyarrow = { version = "~15.0.0", optional = true }

From dabab729d78a5c8143607ec2eaa09fb227cab0cd Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Thu, 5 Sep 2024 16:27:50 -0400
Subject: [PATCH 07/18] Update lock file

---
 airbyte-cdk/python/poetry.lock | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/airbyte-cdk/python/poetry.lock b/airbyte-cdk/python/poetry.lock
index 4564f7891812..4617334a85d1 100644
--- a/airbyte-cdk/python/poetry.lock
+++ b/airbyte-cdk/python/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -1961,8 +1961,8 @@ files = [
 httpx = ">=0.23.0,<1"
 orjson = ">=3.9.14,<4.0.0"
 pydantic = [
-    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
     {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
+    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
 ]
 requests = ">=2,<3"
 
@@ -2668,7 +2668,7 @@ twitter = ["twython"]
 name = "numpy"
 version = "1.26.4"
 description = "Fundamental package for array computing in Python"
-optional = true
+optional = false
 python-versions = ">=3.9"
 files = [
     {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
@@ -2835,7 +2835,7 @@ files = [
 name = "pandas"
 version = "2.2.0"
 description = "Powerful data structures for data analysis, time series, and statistics"
-optional = true
+optional = false
 python-versions = ">=3.9"
 files = [
     {file = "pandas-2.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8108ee1712bb4fa2c16981fba7e68b3f6ea330277f5ca34fa8d557e986a11670"},
@@ -2871,9 +2871,9 @@ files = [
 
 [package.dependencies]
 numpy = [
+    {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""},
     {version = ">=1.22.4,<2", markers = "python_version < \"3.11\""},
     {version = ">=1.23.2,<2", markers = "python_version == \"3.11\""},
-    {version = ">=1.26.0,<2", markers = "python_version >= \"3.12\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -3306,8 +3306,8 @@ files = [
 annotated-types = ">=0.4.0"
 pydantic-core = "2.20.1"
 typing-extensions = [
-    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
     {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
+    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
 ]
 
 [package.extras]
@@ -4959,7 +4959,7 @@ typing-extensions = ">=3.7.4"
 name = "tzdata"
 version = "2024.1"
 description = "Provider of IANA time zone data"
-optional = true
+optional = false
 python-versions = ">=2"
 files = [
     {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"},
@@ -5404,4 +5404,4 @@ vector-db-based = ["cohere", "langchain", "openai", "tiktoken"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "1759d8574c392cf39fccff997263873168087159c5f741314ceff6db4e5a32af"
+content-hash = "1df63da7ed8c114e2732cbe566cf45b102edfbcf4aba88aa1be5fd505addd54b"

From 939fc0065cbf676b67754684b2c3a975117f1f74 Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Fri, 6 Sep 2024 09:21:40 -0400
Subject: [PATCH 08/18] Fix linting

---
 .../sources/declarative/extractors/http_selector.py |  1 -
 .../extractors/response_to_file_extractor.py        |  1 -
 .../parsers/model_to_component_factory.py           | 11 +++++------
 .../sources/declarative/retrievers/retriever.py     |  1 -
 .../transformations/keys_to_lower_transformation.py | 13 +++----------
 .../sources/declarative/job/test_integration.py     | 12 +++++-------
 .../declarative/job/test_job_orchestrator.py        |  3 ---
 7 files changed, 13 insertions(+), 29 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py
index b6da9c2556fb..905477a6c6d9 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py
@@ -3,7 +3,6 @@
 #
 
 from abc import abstractmethod
-from dataclasses import dataclass
 from typing import Any, Iterable, Mapping, Optional
 
 import requests
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
index 8e8dcf407fb9..93cb0bd1b07e 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
@@ -6,7 +6,6 @@
 import uuid
 import zlib
 from contextlib import closing
-from dataclasses import dataclass
 from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
 
 import pandas as pd
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index 3154ddd9b643..de1d4dc7c1bd 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -11,6 +11,9 @@
 from typing import Any, Callable, Dict, List, Mapping, Optional, Type, Union, get_args, get_origin, get_type_hints
 
 from airbyte_cdk.models import FailureType, Level
+from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
+from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
+from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
 from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator
 from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator, NoAuth
 from airbyte_cdk.sources.declarative.auth.jwt import JwtAlgorithm
@@ -126,6 +129,7 @@
     WaitTimeFromHeaderBackoffStrategy,
     WaitUntilTimeFromHeaderBackoffStrategy,
 )
+from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
 from airbyte_cdk.sources.declarative.requesters.paginators import DefaultPaginator, NoPagination, PaginatorTestReadDecorator
 from airbyte_cdk.sources.declarative.requesters.paginators.strategies import (
     CursorPaginationStrategy,
@@ -154,11 +158,6 @@
 
 ComponentDefinition = Mapping[str, Any]
 
-from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
-from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
-from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
-from airbyte_cdk.sources.declarative.requesters.http_job_repository import AsyncHttpJobRepository
-
 
 class ModelToComponentFactory:
     def __init__(
@@ -300,7 +299,7 @@ def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any
         return AddFields(fields=added_field_definitions, parameters=model.parameters or {})
 
     def create_keys_to_lower_transformation(self, model: KeysToLowerModel, config: Config, **kwargs: Any) -> KeysToLowerTransformation:
-        return KeysToLowerTransformation(parameters=model.parameters or {})
+        return KeysToLowerTransformation()
 
     @staticmethod
     def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py
index 1804cdbf5686..155de5782aa0 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/retriever.py
@@ -3,7 +3,6 @@
 #
 
 from abc import abstractmethod
-from dataclasses import dataclass
 from typing import Any, Iterable, Mapping, Optional
 
 from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import StreamSlice
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
index e29bf10f10c6..97e4fccd804e 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
@@ -2,23 +2,16 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 
-from dataclasses import InitVar, dataclass, field
-from typing import Any, Dict, List, Mapping, Optional, Type, Union
+from dataclasses import dataclass
+from typing import Any, Dict, Optional
 
-import dpath
-from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
 from airbyte_cdk.sources.declarative.transformations import RecordTransformation
-from airbyte_cdk.sources.types import Config, FieldPointer, Record, StreamSlice, StreamState
+from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
 
 
 @dataclass
 class KeysToLowerTransformation(RecordTransformation):
 
-    parameters: InitVar[Mapping[str, Any]]
-
-    def __post_init__(self, parameters: Mapping[str, Any]) -> None:
-        pass  # parameters does not need to be considered here
-
     def transform(
         self,
         record: Dict[str, Any],
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
index bbf80c59c294..4d0036dbe5b0 100644
--- a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
@@ -49,10 +49,6 @@ def spec(self, logger: logging.Logger) -> ConnectorSpecification:
         return ConnectorSpecification(connectionSpecification={})
 
     def streams(self, config: Mapping[str, Any]) -> List[Stream]:
-        job_orchestrator_factory_fn = lambda stream_slices: AsyncJobOrchestrator(
-            MockAsyncJobRepository(), stream_slices,
-        )
-
         noop_record_selector = RecordSelector(
             extractor=_EXTRACTOR_NOT_USED,
             config={},
@@ -70,7 +66,9 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
                     parameters={},
                     record_selector=noop_record_selector,
                     stream_slicer=self._stream_slicer,
-                    job_orchestrator_factory=job_orchestrator_factory_fn,
+                    job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
+                        MockAsyncJobRepository(), stream_slices,
+                    ),
                 ),
                 config={},
                 parameters={},
@@ -94,8 +92,8 @@ def setUp(self) -> None:
 
     def test_when_read_then_return_records_from_repository(self) -> None:
         output = read(
-            self._source, 
-            self._CONFIG, 
+            self._source,
+            self._CONFIG,
             CatalogBuilder().with_stream(ConfiguredAirbyteStreamBuilder().with_name(_A_STREAM_NAME)).build()
         )
 
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py
index c83f50ceeb17..de76bc7f7346 100644
--- a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py
@@ -54,8 +54,6 @@ def _update_status(jobs: Set[AsyncJob]) -> None:
     return _update_status
 
 
-
-
 sleep_mock_target = "airbyte_cdk.sources.declarative.async_job.job_orchestrator.time.sleep"
 
 class AsyncJobOrchestratorTest(TestCase):
@@ -129,7 +127,6 @@ def test_given_failure_when_create_and_get_completed_partitions_then_raise_excep
             list(orchestrator.create_and_get_completed_partitions())
         assert self._job_repository.start.call_args_list == [call(_A_STREAM_SLICE)] * 4
 
-
     def test_when_fetch_records_then_yield_records_from_each_job(self) -> None:
         self._job_repository.fetch_records.return_value = [_ANY_RECORD]
         orchestrator = self._orchestrator([_A_STREAM_SLICE])

From 8ca5779034e83bb479429874b306811b38e0f07e Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Fri, 6 Sep 2024 09:27:55 -0400
Subject: [PATCH 09/18] format

---
 .../declarative/transformations/keys_to_lower_transformation.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
index 97e4fccd804e..53db3d49abd4 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py
@@ -11,7 +11,6 @@
 
 @dataclass
 class KeysToLowerTransformation(RecordTransformation):
-
     def transform(
         self,
         record: Dict[str, Any],

From 66bbf760090589bfb5cf4c6ac327646a7e364ffe Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Fri, 6 Sep 2024 09:48:21 -0400
Subject: [PATCH 10/18] Ensure logging

---
 .../declarative/extractors/response_to_file_extractor.py       | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
index 93cb0bd1b07e..d401c3fd27d4 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
@@ -68,8 +68,7 @@ def _filter_null_bytes(self, b: bytes) -> bytes:
 
         res = b.replace(b"\x00", b"")
         if len(res) < len(b):
-            pass
-            # FIXME self.logger.warning("Filter 'null' bytes from string, size reduced %d -> %d chars", len(b), len(res))
+            self.logger.warning("Filter 'null' bytes from string, size reduced %d -> %d chars", len(b), len(res))
         return res
 
     def _save_to_file(self, response: Optional[requests.Response] = None) -> Tuple[str, str]:

From 0be8bd6ca8502eb90b48e31ff131beecb3ae442e Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Fri, 6 Sep 2024 14:12:00 -0400
Subject: [PATCH 11/18] Improve logging and fix constructor

---
 .../sources/declarative/async_job/job.py          |  3 +++
 .../parsers/model_to_component_factory.py         |  2 --
 .../declarative/retrievers/async_retriever.py     | 15 ---------------
 3 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
index 3f2285f27494..a991c9fc790a 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
@@ -43,3 +43,6 @@ def update_status(self, status: AsyncJobStatus) -> None:
             self._timer.stop()
 
         self._status = status
+
+    def __repr__(self) -> str:
+        return f"AsyncJob(data={self.api_job_id()}, job_parameters={self.job_parameters()}, status={self.status()})"
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index de1d4dc7c1bd..a3e4aa79d4f7 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -1243,8 +1243,6 @@ def create_async_retriever(
         job_orchestrator_factory = lambda stream_slices: AsyncJobOrchestrator(job_repository, stream_slices)
 
         return AsyncRetriever(
-            name=name,
-            primary_key=primary_key,
             job_orchestrator_factory=job_orchestrator_factory,
             record_selector=record_selector,
             stream_slicer=stream_slicer,
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
index 7f04ca87ea88..2a49aba99d89 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
@@ -22,27 +22,12 @@ class AsyncRetriever(Retriever):
     parameters: InitVar[Mapping[str, Any]]
     job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
     record_selector: RecordSelector
-    _name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
-    _primary_key: str = field(init=False, repr=False, default="")
     stream_slicer: StreamSlicer = field(default_factory=lambda: SinglePartitionRouter(parameters={}))
 
     def __post_init__(self, parameters: Mapping[str, Any]) -> None:
         self._job_orchestrator_factory = self.job_orchestrator_factory
         self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
         self._parameters = parameters
-        self._name = InterpolatedString(self._name, parameters=parameters) if isinstance(self._name, str) else self._name
-
-    @property
-    def name(self) -> str:
-        """
-        :return: Stream name
-        """
-        return str(self._name.eval(self.config)) if isinstance(self._name, InterpolatedString) else self._name
-
-    @property
-    def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
-        """The stream's primary key"""
-        return self._primary_key
 
     @property
     def state(self) -> StreamState:

From 07b24c2971c0190ca2462f3b2f1424f132e200bb Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Fri, 6 Sep 2024 14:43:11 -0400
Subject: [PATCH 12/18] fix test

---
 .../unit_tests/sources/declarative/job/test_integration.py      | 2 --
 1 file changed, 2 deletions(-)

diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
index 4d0036dbe5b0..fa608bbd16fd 100644
--- a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
@@ -60,8 +60,6 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
         return [
             DeclarativeStream(
                 retriever=AsyncRetriever(
-                    name="test_async_retriever",
-                    primary_key="id",
                     config={},
                     parameters={},
                     record_selector=noop_record_selector,

From 8bccc8e4620642d892964fabe7f07bfdee7074fb Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Mon, 9 Sep 2024 11:32:52 -0400
Subject: [PATCH 13/18] code review

---
 airbyte-cdk/python/airbyte_cdk/logger.py      |  10 +++-
 .../sources/declarative/async_job/job.py      |   4 +-
 .../declarative/async_job/job_orchestrator.py |  24 ++++----
 .../declarative/async_job/repository.py       |   1 -
 .../sources/declarative/async_job/status.py   |  21 +++++--
 .../declarative_component_schema.yaml         |   2 +-
 .../extractors/response_to_file_extractor.py  |  38 ++++++------
 .../parsers/model_to_component_factory.py     |  12 +++-
 .../requesters/http_job_repository.py         |  14 ++++-
 .../{job => async_job}/__init__.py            |   0
 .../{job => async_job}/test_integration.py    |   0
 .../test_job_orchestrator.py                  |   0
 .../extractors/compressed_response            | Bin 0 -> 1129 bytes
 .../extractors/decompressed_response.csv      |  25 ++++++++
 .../test_response_to_file_extractor.py        |  54 ++++++++++++++++++
 15 files changed, 162 insertions(+), 43 deletions(-)
 rename airbyte-cdk/python/unit_tests/sources/declarative/{job => async_job}/__init__.py (100%)
 rename airbyte-cdk/python/unit_tests/sources/declarative/{job => async_job}/test_integration.py (100%)
 rename airbyte-cdk/python/unit_tests/sources/declarative/{job => async_job}/test_job_orchestrator.py (100%)
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/extractors/compressed_response
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/extractors/decompressed_response.csv
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py

diff --git a/airbyte-cdk/python/airbyte_cdk/logger.py b/airbyte-cdk/python/airbyte_cdk/logger.py
index 72673dcfa8f7..2dbf9d40abd7 100644
--- a/airbyte-cdk/python/airbyte_cdk/logger.py
+++ b/airbyte-cdk/python/airbyte_cdk/logger.py
@@ -5,7 +5,7 @@
 import json
 import logging
 import logging.config
-from typing import Any, Mapping, Optional, Tuple
+from typing import Any, Callable, Mapping, Optional, Tuple
 
 from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteMessageSerializer, Level, Type
 from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
@@ -38,6 +38,14 @@ def init_logger(name: Optional[str] = None) -> logging.Logger:
     return logger
 
 
+def lazy_log(logger: logging.Logger, level: int, lazy_log_provider: Callable[[], str]) -> None:
+    """
+    This method ensure that the processing of the log message is only done if the logger is enabled for the log level.
+    """
+    if logger.isEnabledFor(level):
+        logger.log(level, lazy_log_provider())
+
+
 class AirbyteLogFormatter(logging.Formatter):
     """Output log records using AirbyteMessage"""
 
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
index a991c9fc790a..09a527b0bcb0 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job.py
@@ -12,6 +12,8 @@
 
 class AsyncJob:
     """
+    Description of an API job.
+
     Note that the timer will only stop once `update_status` is called so the job might be completed on the API side but until we query for
     it and call `ApiJob.update_status`, `ApiJob.status` will not reflect the actual API side status.
     """
@@ -39,7 +41,7 @@ def job_parameters(self) -> StreamSlice:
     def update_status(self, status: AsyncJobStatus) -> None:
         if self._status != AsyncJobStatus.RUNNING and status == AsyncJobStatus.RUNNING:
             self._timer.start()
-        elif status in [AsyncJobStatus.FAILED, AsyncJobStatus.TIMED_OUT, AsyncJobStatus.COMPLETED]:
+        elif status.is_terminal():
             self._timer.stop()
 
         self._status = status
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
index ff4c37c4f216..ec2bafdde2f1 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py
@@ -5,6 +5,7 @@
 from typing import Any, Generator, Iterable, List, Mapping, Optional, Set
 
 from airbyte_cdk import StreamSlice
+from airbyte_cdk.logger import lazy_log
 from airbyte_cdk.models import FailureType
 from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
 from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
@@ -94,8 +95,8 @@ def _replace_failed_jobs(self, partition: AsyncPartition) -> None:
 
     def _start_jobs(self) -> None:
         """
-        Start the jobs for each slice in the slice iterator.
-        This method iterates over the slice iterator and starts a job for each slice.
+        Retry failed jobs and start jobs for each slice in the slice iterator.
+        This method iterates over the running jobs and slice iterator and starts a job for each slice.
         The started jobs are added to the running partitions.
         Returns:
             None
@@ -139,25 +140,26 @@ def _wait_on_status_update(self) -> None:
         Returns:
             None
         """
-        self._debug_log(f"Polling status in progress. There are currently {len(self._running_partitions)} running partitions.")
+        lazy_log(
+            LOGGER,
+            logging.DEBUG,
+            lambda: f"Polling status in progress. There are currently {len(self._running_partitions)} running partitions.",
+        )
 
         # wait only when there are running partitions
         if self._running_partitions:
-            self._debug_log(f"Waiting for {self._WAIT_TIME_BETWEEN_STATUS_UPDATE_IN_SECONDS} seconds before next poll...")
+            lazy_log(
+                LOGGER,
+                logging.DEBUG,
+                lambda: f"Waiting for {self._WAIT_TIME_BETWEEN_STATUS_UPDATE_IN_SECONDS} seconds before next poll...",
+            )
             time.sleep(self._WAIT_TIME_BETWEEN_STATUS_UPDATE_IN_SECONDS)
 
-    def _debug_log(self, message: str) -> None:
-        # if statement in order to avoid string formatting if we're not in debug mode
-        if LOGGER.isEnabledFor(logging.DEBUG):
-            LOGGER.debug(message)
-
     def _process_completed_partition(self, partition: AsyncPartition) -> None:
         """
         Process a completed partition.
         Args:
             partition (AsyncPartition): The completed partition to process.
-        Returns:
-            AsyncPartition: The processed partition.
         """
         job_ids = list(map(lambda job: job.api_job_id(), {job for job in partition.jobs}))
         LOGGER.info(f"The following jobs for stream slice {partition.stream_slice} have been completed: {job_ids}.")
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
index e536eeae039c..77d7082bbb9e 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
@@ -8,7 +8,6 @@
 from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
 
 
-@dataclass
 class AsyncJobRepository:
     @abstractmethod
     def start(self, stream_slice: StreamSlice) -> AsyncJob:
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
index ece158d11041..586e79889ca1 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/status.py
@@ -3,9 +3,22 @@
 
 from enum import Enum
 
+_TERMINAL = True
+
 
 class AsyncJobStatus(Enum):
-    RUNNING = "RUNNING"
-    COMPLETED = "COMPLETED"
-    FAILED = "FAILED"
-    TIMED_OUT = "TIMED_OUT"
+    RUNNING = ("RUNNING", not _TERMINAL)
+    COMPLETED = ("COMPLETED", _TERMINAL)
+    FAILED = ("FAILED", _TERMINAL)
+    TIMED_OUT = ("TIMED_OUT", _TERMINAL)
+
+    def __init__(self, value: str, is_terminal: bool) -> None:
+        self._value = value
+        self._is_terminal = is_terminal
+
+    def is_terminal(self) -> bool:
+        """
+        A status is terminal when a job status can't be updated anymore. For example if a job is completed, it will stay completed but a
+        running job might because completed, failed or timed out.
+        """
+        return self._is_terminal
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
index 90fcf9279434..a8e5b4e155f6 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -1688,7 +1688,7 @@ definitions:
         type: string
         enum: [JsonlDecoder]
   KeysToLower:
-    title: Keys to Lower Fields
+    title: Keys to Lower Case
     description: A transformation that renames all keys to lower case.
     type: object
     required:
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
index d401c3fd27d4..a177ee65240f 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py
@@ -71,7 +71,7 @@ def _filter_null_bytes(self, b: bytes) -> bytes:
             self.logger.warning("Filter 'null' bytes from string, size reduced %d -> %d chars", len(b), len(res))
         return res
 
-    def _save_to_file(self, response: Optional[requests.Response] = None) -> Tuple[str, str]:
+    def _save_to_file(self, response: requests.Response) -> Tuple[str, str]:
         """
         Saves the binary data from the given response to a temporary file and returns the filepath and response encoding.
 
@@ -86,27 +86,27 @@ def _save_to_file(self, response: Optional[requests.Response] = None) -> Tuple[s
         """
         # set filepath for binary data from response
         decompressor = zlib.decompressobj(zlib.MAX_WBITS | 32)
-
-        if response:
-            tmp_file = str(uuid.uuid4())
-            with closing(response) as response, open(tmp_file, "wb") as data_file:
-                response_encoding = self._get_response_encoding(dict(response.headers or {}))
-                for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
-                    try:
+        needs_decompression = True  # we will assume at first that the response is compressed and change the flag if not
+
+        tmp_file = str(uuid.uuid4())
+        with closing(response) as response, open(tmp_file, "wb") as data_file:
+            response_encoding = self._get_response_encoding(dict(response.headers or {}))
+            for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE):
+                try:
+                    if needs_decompression:
                         data_file.write(decompressor.decompress(chunk))
-                    except zlib.error:
-                        # we bypass having the context of the error here,
-                        # since it's just a flag-type exception to handle a different scenario.
+                        needs_decompression = True
+                    else:
                         data_file.write(self._filter_null_bytes(chunk))
+                except zlib.error:
+                    data_file.write(self._filter_null_bytes(chunk))
+                    needs_decompression = False
 
-            # check the file exists
-            if os.path.isfile(tmp_file):
-                return tmp_file, response_encoding
-            else:
-                raise ValueError(f"The IO/Error occured while verifying binary data. Tmp file {tmp_file} doesn't exist.")
-
-        # return default values
-        return EMPTY_STR, EMPTY_STR
+        # check the file exists
+        if os.path.isfile(tmp_file):
+            return tmp_file, response_encoding
+        else:
+            raise ValueError(f"The IO/Error occured while verifying binary data. Tmp file {tmp_file} doesn't exist.")
 
     def _read_with_chunks(self, path: str, file_encoding: str, chunk_size: int = 100) -> Iterable[Mapping[str, Any]]:
         """
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index a3e4aa79d4f7..e1cf7e4b7806 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -1227,9 +1227,15 @@ def create_async_retriever(
             client_side_incremental_sync=client_side_incremental_sync,
         )
         stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
-        creation_requester = self._create_component_from_model(model=model.creation_requester, decoder=decoder, config=config, name=name)
-        polling_requester = self._create_component_from_model(model=model.polling_requester, decoder=decoder, config=config, name=name)
-        download_requester = self._create_component_from_model(model=model.download_requester, decoder=decoder, config=config, name=name)
+        creation_requester = self._create_component_from_model(
+            model=model.creation_requester, decoder=decoder, config=config, name=f"job creation - {name}"
+        )
+        polling_requester = self._create_component_from_model(
+            model=model.polling_requester, decoder=decoder, config=config, name=f"job polling - {name}"
+        )
+        download_requester = self._create_component_from_model(
+            model=model.download_requester, decoder=decoder, config=config, name=f"job download - {name}"
+        )
         status_extractor = self._create_component_from_model(model=model.status_extractor, decoder=decoder, config=config, name=name)
         urls_extractor = self._create_component_from_model(model=model.urls_extractor, decoder=decoder, config=config, name=name)
         job_repository: AsyncJobRepository = AsyncHttpJobRepository(
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
index 5b26c11773a3..1df8dabee243 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
-
-
+import logging
 import uuid
 from dataclasses import dataclass
 from typing import Any, Dict, Iterable, Mapping, Optional
 
 import requests
+from airbyte_cdk.logger import lazy_log
 from airbyte_cdk.models import FailureType
 from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
 from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
@@ -17,6 +17,8 @@
 from airbyte_cdk.utils import AirbyteTracedException
 from requests import Response
 
+LOGGER = logging.getLogger("airbyte")
+
 
 @dataclass
 class AsyncHttpJobRepository(AsyncJobRepository):
@@ -122,6 +124,10 @@ def update_jobs_status(self, jobs: Iterable[AsyncJob]) -> None:
         """
         Updates the status of multiple jobs.
 
+        Because we don't have interpolation on random fields, we have this hack which consist on using the stream_slice to allow for
+        interpolation. We are looking at enabling interpolation on more field which would require a change to those three layers:
+        HttpRequester, RequestOptionProvider, RequestInputProvider.
+
         Args:
             jobs (Iterable[AsyncJob]): An iterable of AsyncJob objects representing the jobs to update.
 
@@ -135,6 +141,10 @@ def update_jobs_status(self, jobs: Iterable[AsyncJob]) -> None:
             )
             polling_response: requests.Response = self._get_validated_polling_response(stream_slice)
             job_status: AsyncJobStatus = self._get_validated_job_status(polling_response)
+
+            if job_status != job.status():
+                lazy_log(LOGGER, logging.DEBUG, lambda: f"Status of job {job.api_job_id()} changed from {job.status()} to {job_status}")
+
             job.update_status(job_status)
             if job_status == AsyncJobStatus.COMPLETED:
                 self._polling_job_response_by_id[job.api_job_id()] = polling_response
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/__init__.py b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/__init__.py
similarity index 100%
rename from airbyte-cdk/python/unit_tests/sources/declarative/job/__init__.py
rename to airbyte-cdk/python/unit_tests/sources/declarative/async_job/__init__.py
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_integration.py
similarity index 100%
rename from airbyte-cdk/python/unit_tests/sources/declarative/job/test_integration.py
rename to airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_integration.py
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job_orchestrator.py
similarity index 100%
rename from airbyte-cdk/python/unit_tests/sources/declarative/job/test_job_orchestrator.py
rename to airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job_orchestrator.py
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/compressed_response b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/compressed_response
new file mode 100644
index 0000000000000000000000000000000000000000..da79e347f053282a05161fd1c39ef09f80e3d11c
GIT binary patch
literal 1129
zcmV-v1eW_BiwFP!000001Fct0Z`(!?z57=bJZp@d+1ZcL$%@Q0LLJGK=oalo*iYAW
z92ad;p!xL$snmfO7VUw$X!%5PAKtv#H)`6qtIbWbXkKsjhwivty=|LCbF=!gUR_`B
z+r!~_v)Q&s90SxWn)RmpsaZ6KZq>EN+x_l`&34@$_wBcv-L_dYx4T2Px;d_Q*Zu3(
zySr_-@9$sTbnSlIKQnMR^tZm*ZQJAa?(M5~-z=IR->kaB>h`v0wAuDgzh14|SG(PJ
z&7!&6Zoa>3|6#e_x2vwbKCZg{r`zi<7wg@&Tdlj}=DN9PYIrQi@)RE49Z_Aq>lc@)
ze{Xm!-=ydFA42-j?-u^}@IE}1X3>8dBF%VO50{D%6oJAHX{pVU=;vkvHrT@@6rW7;
zP{kEs6+C!h1CByUfsLysjF`<vO!-M-dVo=U6(bcYGXiHc5XdW#;;S`5R19WBjOLS!
zrzO)pY{i-AI6;h9fU{u;WDufgPK=^U=5uC#Hex7~#CU+UEfoW0&?zynK?9PM0<>t7
zsoturMv~3POsffIcB#~<m}oT<LX0H9Vls$P6-q9-SVFHno-@<)Nj6<e9%`A95To}{
z5G`;}5rQWTN?VLo841ds6O;K&hSMe+6f2d9OjM9-6<}v9gz6n6Ci2;9n~gpv#`7s=
zrWG56l#ol+HbE^0fe8bMH5zQp3YAzY;?IfA*OsWKiJcBORM{)VkdXp#RR|tUfGR50
zAceIZx!=x5%uN%sJ?yO3s(Jxq*#NU9h&o1)qID1yLvB<`oUfK-r%g7L*f~;)tbmGE
zfGs76B_l|&Rgf}AKV~*xU(&S8%;2&lm_&30%ryX8Zy~0tKx5%hA_WZ6Tr|Hvm*1Z6
z<I~ms{ZqJ1&oA43wX5z>mSmUaJPc2;``n9x0#;oa6gmZ5V+KJ(P;2oirW{4*ksfY7
zJ*JO#Gr)ySsGbCJ;R4Lrf+Q~xqe?Cnc0%JcH1jc=cDEU<WmMEQXoMW92R4ZzCiZ~3
zpi$L2w&k4Je3Iex0Ko%aZ8V~e0+IWe_9{TE;=vK2)<&UlYA%|;E+0Ss{+xb!e0YEL
zDSUc*NSEpP=|7C5dCB;-kzV#z(UPz$kc((wlmb491+R^xSkaU`$~&AdSM*e_*rgg^
z5lz`T2Z-c>U15+c4y<*_qA{_S@};pp{_oDBNkK?2afq`RbYh<kM5`mPFBZJ*<IE_{
zC9B}`sI%spfBExp^e>|nb4hHlLS}`)Die5(8FbO9_#j+uKLCV3A9A?N`s)5fs&`0}
zAD+U?*Mv9%%6CeZiq2;67nqmkWZ>!|>Pley?b%ZA)IR#PiqV%4OCCik&KF%Y-9u*Z
zak8mq(H4p(2kdiz5S#^1tej<w*^ej6`TiczblRUza7Kl-L;{M+0f$n-muSF}cRpIA
vnns42?>xlmOywcaH02UWWsvGCFh_<^GC@vgvWb;V49&&g@Z|Ns*$w~zJX0{1

literal 0
HcmV?d00001

diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/decompressed_response.csv b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/decompressed_response.csv
new file mode 100644
index 000000000000..ebef74b8ec70
--- /dev/null
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/decompressed_response.csv
@@ -0,0 +1,25 @@
+"EMAIL","FIRST_NAME","LAST_NAME","ADDRESS_LINE_1","ADDRESS_LINE_2","CITY","STATE_PROVINCE_REGION","POSTAL_CODE","COUNTRY","ALTERNATE_EMAILS","PHONE_NUMBER","WHATSAPP","LINE","FACEBOOK","UNIQUE_NAME","CREATED_AT","UPDATED_AT","CONTACT_ID"
+"fake_email_10@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:22Z","2021-02-01T12:35:51Z","eae8c5c8-f97e-40a8-8945-72acca457f5a"
+"fake_email_1@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:08Z","2021-02-01T12:35:38Z","198f959f-f441-4d15-a280-9e8f65a90ba5"
+"fake_email_12@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:24Z","2021-02-01T12:35:53Z","6975b74c-bb1e-4d54-a251-b934c4193ed4"
+"fake_email_8@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:19Z","2021-02-01T12:35:49Z","36ef1a2d-3cc4-4515-9c00-1615c5f860d0"
+"fake_email_18@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:30Z","2021-02-01T12:36:00Z","19163421-bb29-495d-950f-edede6218081"
+"fake_email_3@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:14Z","2021-02-01T12:35:43Z","d1211b88-e116-4a0b-a823-0361bf059a06"
+"fake_email_9@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:21Z","2021-02-01T12:35:50Z","ef4225b0-dff9-4756-af87-c4228d836d53"
+"fake_email_4@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:15Z","2021-02-01T12:35:44Z","9adef36c-fe51-421a-9653-6bd010962e98"
+"fake_email_2@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:13Z","2021-02-01T12:35:42Z","210d8004-d12a-4f01-815a-f90cfa9e4360"
+"fake_email_6@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:17Z","2021-02-01T12:35:46Z","76330f89-5645-4432-b3bb-9e33a9195273"
+"fake_email_14@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:26Z","2021-02-01T12:35:55Z","77200269-0b69-462c-bed1-9e6f912d4b83"
+"fake_email_13@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:25Z","2021-02-01T12:35:54Z","c91c993b-1dfa-4686-bcf0-31e4aeb2a1a9"
+"joepogbm@ggma.co",,,,,,,,,,,,,,,"2021-02-03T19:26:52Z","2021-02-03T19:27:21Z","a2a1f3f4-0170-4fbd-9152-ffe8cbcdb93d"
+"fake_email_17@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:29Z","2021-02-01T12:35:59Z","e45af829-de4e-44d6-9c89-bb0c7ce47925"
+"fake_email_15@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:27Z","2021-02-01T12:35:56Z","50b36a31-daf8-45c4-bc48-13e150f6746e"
+"fake_email_7@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:18Z","2021-02-01T12:35:47Z","353113b9-b41e-480a-bf98-72213350194c"
+"y.kurochkin@zazmic.com",,,,,,,,,,,,,,,"2021-02-03T19:34:41Z","2021-02-03T19:35:47Z","0b62947e-de93-419e-8c96-83572bf15ed1"
+"fake_email_19@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:31Z","2021-02-01T12:36:01Z","9932d677-1128-47e4-9d97-667c6155bfee"
+"joepogbum@ggma.co",,,,,,,,,,,,,,,"2021-02-03T19:22:41Z","2021-02-03T19:23:10Z","ba3c48d5-b63b-48e6-8687-c5034ed0a8dd"
+"fake_email_0@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:34:49Z","2021-02-01T12:35:19Z","44ec451f-d401-40d2-831d-3e3ce8a94f66"
+"avida.d3@gmail.com","dima","dima",,,,,,,,,,,,,"2021-09-08T09:02:22Z","2021-09-08T09:04:58Z","2f7b13f2-60d2-462a-bfb0-d30bb8eabed8"
+"fake_email_16@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:28Z","2021-02-01T12:35:57Z","c6cfd936-e327-48da-aa76-824076461d80"
+"fake_email_11@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:23Z","2021-02-01T12:35:52Z","4101feb2-2b07-4aef-8eb5-62878b612fcd"
+"fake_email_5@lmail.c","Fake contact","Lastname",,,,,"22341",,,,,,,,"2021-02-01T12:35:16Z","2021-02-01T12:35:45Z","32deb20d-9f8f-44b4-aed2-dc15d5bf45ba"
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py
new file mode 100644
index 000000000000..33ed74d395fd
--- /dev/null
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+from io import BytesIO
+from pathlib import Path
+from unittest import TestCase
+
+import requests
+import requests_mock
+from airbyte_cdk.sources.declarative.extractors import ResponseToFileExtractor
+
+
+class ResponseToFileExtractorTest(TestCase):
+    def setUp(self) -> None:
+        self._extractor = ResponseToFileExtractor()
+        self._http_mocker = requests_mock.Mocker()
+        self._http_mocker.__enter__()
+
+    def tearDown(self) -> None:
+        self._http_mocker.__exit__(None, None, None)
+
+    def test_compressed_response(self) -> None:
+        response = self._mock_streamed_response_from_file(self._compressed_response_path())
+        extracted_records = list(self._extractor.extract_records(response))
+        assert len(extracted_records) == 24
+
+    def test_text_response(self) -> None:
+        response = self._mock_streamed_response_from_file(self._decompressed_response_path())
+        extracted_records = list(self._extractor.extract_records(response))
+        assert len(extracted_records) == 24
+
+    def test_text_response_with_null_bytes(self) -> None:
+        csv_with_null_bytes = '"FIRST_\x00NAME","LAST_NAME"\n"a first n\x00ame","a last na\x00me"\n'
+        response = self._mock_streamed_response(BytesIO(csv_with_null_bytes.encode("utf-8")))
+
+        extracted_records = list(self._extractor.extract_records(response))
+
+        assert extracted_records == [{"FIRST_NAME": "a first name", "LAST_NAME": "a last name"}]
+
+    def _test_folder_path(self) -> Path:
+        return Path(__file__).parent.resolve()
+
+    def _compressed_response_path(self) -> Path:
+        return self._test_folder_path() / "compressed_response"
+
+    def _decompressed_response_path(self) -> Path:
+        return self._test_folder_path() / "decompressed_response.csv"
+
+    def _mock_streamed_response_from_file(self, path: Path) -> requests.Response:
+        with path.open("rb") as f:
+            return self._mock_streamed_response(f)  # type: ignore  # Could not find the right typing for file io
+
+    def _mock_streamed_response(self, io: BytesIO) -> requests.Response:
+        any_url = "https://anyurl.com"
+        self._http_mocker.register_uri("GET", any_url, [{"body": io, "status_code": 200}])
+        return requests.get(any_url)

From 68150eafdd83a54d7d5381fb629dfa8c62f78405 Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Mon, 9 Sep 2024 11:40:12 -0400
Subject: [PATCH 14/18] mypy

---
 .../sources/declarative/parsers/model_to_component_factory.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index e1cf7e4b7806..103650863ecb 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -1211,11 +1211,11 @@ def create_async_retriever(
         config: Config,
         *,
         name: str,
-        primary_key: Optional[Union[str, List[str], List[List[str]]]],
+        primary_key: Optional[Union[str, List[str], List[List[str]]]],  # this seems to be needed to match create_simple_retriever
         stream_slicer: Optional[StreamSlicer],
         client_side_incremental_sync: Optional[Dict[str, Any]] = None,
         transformations: List[RecordTransformation],
-        **kwargs,
+        **kwargs: Any,
     ) -> AsyncRetriever:
 
         decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={})

From cda584a93d460a48ec047fc6731b554bfaa031bd Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Mon, 9 Sep 2024 11:55:38 -0400
Subject: [PATCH 15/18] lint

---
 .../airbyte_cdk/sources/declarative/async_job/repository.py    | 1 -
 .../sources/declarative/parsers/model_to_component_factory.py  | 3 +--
 .../sources/declarative/retrievers/async_retriever.py          | 3 +--
 .../sources/declarative/async_job/test_job_orchestrator.py     | 1 +
 4 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
index 77d7082bbb9e..2880fea16333 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/async_job/repository.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
 
 from abc import abstractmethod
-from dataclasses import dataclass
 from typing import Any, Iterable, Mapping, Set
 
 from airbyte_cdk import StreamSlice
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
index 103650863ecb..ed0bde320cf1 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -1246,10 +1246,9 @@ def create_async_retriever(
             status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
             urls_extractor=urls_extractor,
         )
-        job_orchestrator_factory = lambda stream_slices: AsyncJobOrchestrator(job_repository, stream_slices)
 
         return AsyncRetriever(
-            job_orchestrator_factory=job_orchestrator_factory,
+            job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(job_repository, stream_slices),
             record_selector=record_selector,
             stream_slicer=stream_slicer,
             config=config,
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
index 2a49aba99d89..4bb9421e44dc 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/async_retriever.py
@@ -2,12 +2,11 @@
 
 
 from dataclasses import InitVar, dataclass, field
-from typing import Any, Callable, Iterable, List, Mapping, Optional, Union
+from typing import Any, Callable, Iterable, Mapping, Optional
 
 from airbyte_cdk.models import FailureType
 from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator, AsyncPartition
 from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
-from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter
 from airbyte_cdk.sources.declarative.retrievers import Retriever
 from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job_orchestrator.py b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job_orchestrator.py
index de76bc7f7346..5e2c3a51a5ab 100644
--- a/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job_orchestrator.py
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job_orchestrator.py
@@ -56,6 +56,7 @@ def _update_status(jobs: Set[AsyncJob]) -> None:
 
 sleep_mock_target = "airbyte_cdk.sources.declarative.async_job.job_orchestrator.time.sleep"
 
+
 class AsyncJobOrchestratorTest(TestCase):
     def setUp(self) -> None:
         self._job_repository = Mock(spec=AsyncJobRepository)

From aaf551d97909b52966fab088b6678eee3c135827 Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Mon, 9 Sep 2024 12:27:22 -0400
Subject: [PATCH 16/18] Fix test and init of AsyncHttpJobRepository

---
 .../sources/declarative/requesters/http_job_repository.py     | 4 ++--
 .../transformations/test_keys_to_lower_transformation.py      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
index 1df8dabee243..ada35e9d5571 100644
--- a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
+++ b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/http_job_repository.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
 import logging
 import uuid
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any, Dict, Iterable, Mapping, Optional
 
 import requests
@@ -29,7 +29,7 @@ class AsyncHttpJobRepository(AsyncJobRepository):
     status_mapping: Mapping[str, AsyncJobStatus]
     urls_extractor: DpathExtractor
 
-    record_extractor: RecordExtractor = ResponseToFileExtractor()
+    record_extractor: RecordExtractor = field(init=False, repr=False, default_factory=lambda: ResponseToFileExtractor())
 
     def __post_init__(self) -> None:
         self._create_job_response_by_id: Dict[str, Response] = {}
diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_keys_to_lower_transformation.py b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_keys_to_lower_transformation.py
index 591ad06f97c7..7464b9f04fd2 100644
--- a/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_keys_to_lower_transformation.py
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/transformations/test_keys_to_lower_transformation.py
@@ -9,5 +9,5 @@
 
 def test_transform() -> None:
     record = {"wIth_CapITal": _ANY_VALUE, "anOThEr_witH_Caps": _ANY_VALUE}
-    KeysToLowerTransformation(parameters={}).transform(record)
+    KeysToLowerTransformation().transform(record)
     assert {"with_capital": _ANY_VALUE, "another_with_caps": _ANY_VALUE}

From 035368d98e5ccbd7bb023f4c260a034911b82bdb Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Mon, 9 Sep 2024 14:48:10 -0400
Subject: [PATCH 17/18] adding missing test

---
 .../sources/declarative/async_job/test_job.py | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job.py

diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job.py b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job.py
new file mode 100644
index 000000000000..bae215e233fa
--- /dev/null
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+
+import time
+from datetime import timedelta
+from unittest import TestCase
+
+from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
+from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
+from airbyte_cdk.sources.declarative.types import StreamSlice
+
+_AN_API_JOB_ID = "an api job id"
+_ANY_STREAM_SLICE = StreamSlice(partition={}, cursor_slice={})
+_A_VERY_BIG_TIMEOUT = timedelta(days=999999999)
+_IMMEDIATELY_TIMED_OUT = timedelta(microseconds=1)
+
+
+class AsyncJobTest(TestCase):
+    def test_given_timer_is_not_out_when_status_then_return_actual_status(self) -> None:
+        job = AsyncJob(_AN_API_JOB_ID, _ANY_STREAM_SLICE, _A_VERY_BIG_TIMEOUT)
+        assert job.status() == AsyncJobStatus.RUNNING
+
+
+    def test_given_timer_is_out_when_status_then_return_timed_out(self) -> None:
+        job = AsyncJob(_AN_API_JOB_ID, _ANY_STREAM_SLICE, _IMMEDIATELY_TIMED_OUT)
+        time.sleep(0.001)
+        assert job.status() == AsyncJobStatus.TIMED_OUT
+
+    def test_given_status_is_terminal_when_update_status_then_stop_timer(self) -> None:
+        """
+        This test will become important once we will print stats associated with jobs. As for now, we stop the timer but do not return any
+        metrics regarding the timer so it is not useful.
+        """
+        pass

From eb9bccd3d8ebb47bbb1e30e0777acb563e21fe8f Mon Sep 17 00:00:00 2001
From: maxi297 <maxime@airbyte.io>
Date: Tue, 10 Sep 2024 08:14:50 -0400
Subject: [PATCH 18/18] lint

---
 .../python/unit_tests/sources/declarative/async_job/test_job.py  | 1 -
 1 file changed, 1 deletion(-)

diff --git a/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job.py b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job.py
index bae215e233fa..6399433e4413 100644
--- a/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job.py
+++ b/airbyte-cdk/python/unit_tests/sources/declarative/async_job/test_job.py
@@ -19,7 +19,6 @@ def test_given_timer_is_not_out_when_status_then_return_actual_status(self) -> N
         job = AsyncJob(_AN_API_JOB_ID, _ANY_STREAM_SLICE, _A_VERY_BIG_TIMEOUT)
         assert job.status() == AsyncJobStatus.RUNNING
 
-
     def test_given_timer_is_out_when_status_then_return_timed_out(self) -> None:
         job = AsyncJob(_AN_API_JOB_ID, _ANY_STREAM_SLICE, _IMMEDIATELY_TIMED_OUT)
         time.sleep(0.001)