diff --git a/ddtrace/internal/packages.py b/ddtrace/internal/packages.py new file mode 100644 index 00000000000..46504e8c3d1 --- /dev/null +++ b/ddtrace/internal/packages.py @@ -0,0 +1,34 @@ +import typing + + +Distribution = typing.NamedTuple("Distribution", [("name", str), ("version", str), ("path", str)]) + +_DISTRIBUTIONS = None # type: typing.Optional[typing.Set[Distribution]] + + +def get_distributions(): + # type: () -> typing.Set[Distribution] + """returns the name and version of all distributions in a python path""" + global _DISTRIBUTIONS + if _DISTRIBUTIONS is not None: + return _DISTRIBUTIONS + + try: + import importlib.metadata as importlib_metadata + except ImportError: + import importlib_metadata # type: ignore[no-redef] + + pkgs = set() + for dist in importlib_metadata.distributions(): + # Get the root path of all files in a distribution + path = str(dist.locate_file("")) + # PKG-INFO and/or METADATA files are parsed when dist.metadata is accessed + # Optimization: we should avoid accessing dist.metadata more than once + metadata = dist.metadata + name = metadata["name"] + version = metadata["version"] + if name and version: + pkgs.add(Distribution(path=path, name=name, version=version)) + + _DISTRIBUTIONS = pkgs + return _DISTRIBUTIONS diff --git a/ddtrace/internal/telemetry/data.py b/ddtrace/internal/telemetry/data.py index d79d2820eaf..de6afebcdbc 100644 --- a/ddtrace/internal/telemetry/data.py +++ b/ddtrace/internal/telemetry/data.py @@ -1,10 +1,12 @@ import platform import sys from typing import Dict +from typing import List from typing import Tuple from ddtrace.internal.compat import PY3 from ddtrace.internal.constants import DEFAULT_SERVICE_NAME +from ddtrace.internal.packages import get_distributions from ddtrace.internal.runtime.container import get_container_info from ddtrace.internal.utils.cache import cached @@ -58,6 +60,13 @@ def _get_application(key): } +def get_dependencies(): + # type: () -> List[Dict[str, str]] + """Returns a unique list of the names and versions of all installed packages""" + dependencies = {(dist.name, dist.version) for dist in get_distributions()} + return [{"name": name, "version": version} for name, version in dependencies] + + def get_application(service, version, env): """Creates a dictionary to store application data using ddtrace configurations and the System-Specific module""" # We cache the application dict to reduce overhead since service, version, or env configurations diff --git a/ddtrace/internal/telemetry/writer.py b/ddtrace/internal/telemetry/writer.py index 9228fd9cacb..4d6a0c2cded 100644 --- a/ddtrace/internal/telemetry/writer.py +++ b/ddtrace/internal/telemetry/writer.py @@ -18,6 +18,7 @@ from ..service import ServiceStatus from ..utils.time import StopWatch from .data import get_application +from .data import get_dependencies from .data import get_host_info @@ -187,12 +188,8 @@ def app_started_event(self): if self._forked: # app-started events should only be sent by the main process return - # pkg_resources import is inlined for performance reasons - # This import is an expensive operation - import pkg_resources - payload = { - "dependencies": [{"name": pkg.project_name, "version": pkg.version} for pkg in pkg_resources.working_set], + "dependencies": get_dependencies(), "integrations": self._flush_integrations_queue(), "configurations": [], } diff --git a/setup.py b/setup.py index 3955eac637b..9c07154513e 100644 --- a/setup.py +++ b/setup.py @@ -247,6 +247,7 @@ def get_exts_for(name): "attrs>=19.2.0", "six>=1.12.0", "typing_extensions; python_version<'3.8'", + "importlib_metadata; python_version<'3.8'", ] + bytecode, extras_require={ diff --git a/tests/internal/test_packages.py b/tests/internal/test_packages.py new file mode 100644 index 00000000000..82882fc7c12 --- /dev/null +++ b/tests/internal/test_packages.py @@ -0,0 +1,26 @@ +import os + +from ddtrace.internal.packages import get_distributions + + +def test_get_distributions(): + """use pkg_resources to validate package names and versions returned by get_distributions()""" + import pkg_resources + + pkg_resources_ws = {pkg.project_name for pkg in pkg_resources.working_set} + + importlib_pkgs = set() + for pkg in get_distributions(): + assert pkg.name + assert pkg.version + assert os.path.exists(pkg.path) + # The package name in typing_extensions-4.x.x.dist-info/METADATA is set to `typing_extensions` + # this is inconsistent with the package name found in pkg_resources. The block below corrects this. + # The correct package name is typing-extensions. + if pkg.name == "typing_extensions" and "typing-extensions" in pkg_resources_ws: + importlib_pkgs.add("typing-extensions") + else: + importlib_pkgs.add(pkg.name) + + # assert that pkg_resources and importlib.metadata return the same packages + assert pkg_resources_ws == importlib_pkgs diff --git a/tests/tracer/telemetry/test_data.py b/tests/tracer/telemetry/test_data.py index 9cfbbe6fe6b..1520e24bfd1 100644 --- a/tests/tracer/telemetry/test_data.py +++ b/tests/tracer/telemetry/test_data.py @@ -5,11 +5,13 @@ import pytest from ddtrace.internal.compat import PY3 +from ddtrace.internal.packages import get_distributions from ddtrace.internal.runtime.container import CGroupInfo from ddtrace.internal.telemetry.data import _format_version_info from ddtrace.internal.telemetry.data import _get_container_id from ddtrace.internal.telemetry.data import _get_os_version from ddtrace.internal.telemetry.data import get_application +from ddtrace.internal.telemetry.data import get_dependencies from ddtrace.internal.telemetry.data import get_host_info from ddtrace.internal.telemetry.data import get_hostname from ddtrace.internal.telemetry.data import get_version @@ -143,3 +145,10 @@ def test_get_container_id_when_container_does_not_exists(): with mock.patch("ddtrace.internal.telemetry.data.get_container_info") as gci: gci.return_value = None assert _get_container_id() == "" + + +def test_get_dependencies(): + """asserts that get_dependencies and get_distributions return the same packages""" + pkgs_as_dicts = {(dep["name"], dep["version"]) for dep in get_dependencies()} + pkgs_as_distributions = {(dist.name, dist.version) for dist in get_distributions()} + assert pkgs_as_dicts == pkgs_as_distributions diff --git a/tests/tracer/telemetry/test_writer.py b/tests/tracer/telemetry/test_writer.py index d1ea2883508..27bfa4d0994 100644 --- a/tests/tracer/telemetry/test_writer.py +++ b/tests/tracer/telemetry/test_writer.py @@ -2,10 +2,10 @@ import httpretty import mock -import pkg_resources import pytest from ddtrace.internal.telemetry.data import get_application +from ddtrace.internal.telemetry.data import get_dependencies from ddtrace.internal.telemetry.data import get_host_info from ddtrace.internal.telemetry.writer import TelemetryWriter from ddtrace.internal.telemetry.writer import get_runtime_id @@ -100,7 +100,7 @@ def test_app_started_event(mock_time, mock_send_request, telemetry_writer): assert headers["DD-Telemetry-Request-Type"] == "app-started" # validate request body payload = { - "dependencies": [{"name": pkg.project_name, "version": pkg.version} for pkg in pkg_resources.working_set], + "dependencies": get_dependencies(), "integrations": [ { "name": "integration-t",