style: reformat code (#5)

* style: reformat code * style: reformat code * style: reformat code
docarray · Dec 23, 2021 · d95d3ed · d95d3ed
1 parent 28a7e42
commit d95d3ed
Show file tree

Hide file tree

Showing 15 changed files with 101 additions and 40 deletions.
diff --git a/.github/requirements-cicd.txt b/.github/requirements-cicd.txt
@@ -12,3 +12,4 @@ matplotlib
 rich
 Pillow
 lz4
+fastapi
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -42,8 +42,7 @@ jobs:
           python -m pip install wheel
           pip install -r .github/requirements-test.txt
           pip install -r .github/requirements-cicd.txt
-          pip install --no-cache-dir .
-          export JINA_LOG_LEVEL="ERROR"
+          pip install --no-cache-dir ".[full]"
       - name: Test
         id: test
         run: |

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -113,8 +113,7 @@ jobs:
           python -m pip install wheel
           pip install -r .github/requirements-test.txt
           pip install -r .github/requirements-cicd.txt
-          pip install --no-cache-dir .
-          export JINA_LOG_LEVEL="ERROR"
+          pip install --no-cache-dir ".[full]"
       - name: Test
         id: test
         run: |

diff --git a/docarray/array/mixins/getattr.py b/docarray/array/mixins/getattr.py
@@ -7,7 +7,7 @@
 class GetAttributeMixin:
     """Helpers that provide attributes getter in bulk """
 
-    def get_attributes(self, *fields: str) -> Union[List, List[List]]:
+    def get_attributes(self, *fields: str) -> List:
         """Return all nonempty values of the fields from all docs this array contains
 
         :param fields: Variable length argument with the name of the fields to extract
@@ -24,7 +24,7 @@ def get_attributes(self, *fields: str) -> Union[List, List[List]]:
     def get_attributes_with_docs(
         self,
         *fields: str,
-    ) -> Tuple[Union[List, List[List]], 'DocumentArray']:
+    ) -> Tuple[List, 'DocumentArray']:
         """Return all nonempty values of the fields together with their nonempty docs
 
         :param fields: Variable length argument with the name of the fields to extract

diff --git a/docarray/array/mixins/io/csv.py b/docarray/array/mixins/io/csv.py
@@ -101,6 +101,4 @@ def load_csv(
 
         from ....document.generators import from_csv
 
-        da = cls()
-        da.extend(from_csv(file, field_resolver=field_resolver))
-        return da
+        return cls(from_csv(file, field_resolver=field_resolver))
diff --git a/docarray/array/mixins/io/dataframe.py b/docarray/array/mixins/io/dataframe.py
@@ -21,7 +21,7 @@ def to_dataframe(self, **kwargs) -> 'DataFrame':
         """
         from pandas import DataFrame
 
-        return DataFrame.from_dict(self.to_list(), **kwargs)
+        return DataFrame.from_dict(self.to_list_safe(), **kwargs)
 
     @classmethod
     def from_dataframe(cls: Type['T'], df: 'DataFrame') -> 'T':

diff --git a/docarray/array/mixins/io/json.py b/docarray/array/mixins/io/json.py
@@ -50,7 +50,17 @@ def load_json(cls: Type['T'], file: Union[str, TextIO]) -> 'T':
         with file_ctx as fp:
             return cls(constructor(v) for v in fp)
 
-    def to_list(self) -> List:
+    @classmethod
+    def from_json(cls: Type['T'], file: Union[str, TextIO]) -> 'T':
+        return cls.load_json(file)
+
+    @classmethod
+    def from_list_safe(cls: Type['T'], values: List) -> 'T':
+        from .... import Document
+
+        return cls(Document.from_dict(v) for v in values)
+
+    def to_list_safe(self) -> List:
         """Convert the object into a Python list.
 
         .. note::
@@ -65,4 +75,4 @@ def to_json(self) -> str:
 
         :return: a Python list
         """
-        return json.dumps(self.to_list())
+        return json.dumps(self.to_list_safe())
diff --git a/docarray/array/mixins/traverse.py b/docarray/array/mixins/traverse.py
@@ -96,7 +96,7 @@ def traverse_flat(
         self,
         traversal_paths: str,
         filter_fn: Optional[Callable[['Document'], bool]] = None,
-    ) -> Union['DocumentArray', Iterable['Document']]:
+    ) -> 'DocumentArray':
         """
         Returns a single flattened :class:``TraversableSequence`` with all Documents, that are reached
         via the ``traversal_paths``.

diff --git a/docarray/document/mixins/__init__.py b/docarray/document/mixins/__init__.py
@@ -7,12 +7,12 @@
 from .image import ImageDataMixin
 from .mesh import MeshDataMixin
 from .plot import PlotMixin
+from .porting import PortingMixin
 from .property import PropertyMixin
 from .protobuf import ProtobufMixin
 from .sugar import SingletonSugarMixin
 from .text import TextDataMixin
 from .video import VideoDataMixin
-from .porting import PortingMixin
 
 
 class AllMixins(

diff --git a/docarray/document/mixins/_property.py b/docarray/document/mixins/_property.py
@@ -19,6 +19,15 @@ def id(self) -> str:
     def id(self, value: str):
         self._data.id = value
 
+    @property
+    def parent_id(self) -> Optional[str]:
+        self._data._set_default_value_if_none('parent_id')
+        return self._data.parent_id
+
+    @parent_id.setter
+    def parent_id(self, value: str):
+        self._data.parent_id = value
+
     @property
     def granularity(self) -> Optional[int]:
         self._data._set_default_value_if_none('granularity')
@@ -37,15 +46,6 @@ def adjacency(self) -> Optional[int]:
     def adjacency(self, value: int):
         self._data.adjacency = value
 
-    @property
-    def parent_id(self) -> Optional[str]:
-        self._data._set_default_value_if_none('parent_id')
-        return self._data.parent_id
-
-    @parent_id.setter
-    def parent_id(self, value: str):
-        self._data.parent_id = value
-
     @property
     def buffer(self) -> Optional[bytes]:
         self._data._set_default_value_if_none('buffer')
@@ -64,6 +64,15 @@ def blob(self) -> Optional['ArrayType']:
     def blob(self, value: 'ArrayType'):
         self._data.blob = value
 
+    @property
+    def mime_type(self) -> Optional[str]:
+        self._data._set_default_value_if_none('mime_type')
+        return self._data.mime_type
+
+    @mime_type.setter
+    def mime_type(self, value: str):
+        self._data.mime_type = value
+
     @property
     def text(self) -> Optional[str]:
         self._data._set_default_value_if_none('text')
@@ -100,15 +109,6 @@ def uri(self) -> Optional[str]:
     def uri(self, value: str):
         self._data.uri = value
 
-    @property
-    def mime_type(self) -> Optional[str]:
-        self._data._set_default_value_if_none('mime_type')
-        return self._data.mime_type
-
-    @mime_type.setter
-    def mime_type(self, value: str):
-        self._data.mime_type = value
-
     @property
     def tags(self) -> Optional[Dict[str, 'StructValueType']]:
         self._data._set_default_value_if_none('tags')

diff --git a/docarray/document/mixins/plot.py b/docarray/document/mixins/plot.py
@@ -54,10 +54,10 @@ def _mermaid_to_url(self, img_type: str) -> str:
         """
         mermaid_str = (
             """
-                                                                        %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#FFC666'}}}%%
-                                                                        classDiagram
-                    
-                                                                                """
+                                                                            %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#FFC666'}}}%%
+                                                                            classDiagram
+                        
+                                                                                    """
             + self.__mermaid_str__()
         )
 

diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py
@@ -1,5 +1,5 @@
 import pickle
-from typing import Union, Optional, TYPE_CHECKING, Type, Dict
+from typing import Optional, TYPE_CHECKING, Type, Dict
 
 from ...helper import compress_bytes, decompress_bytes
 

diff --git a/setup.py b/setup.py
@@ -39,7 +39,21 @@
     long_description_content_type='text/markdown',
     zip_safe=False,
     setup_requires=['setuptools>=18.0', 'wheel'],
-    install_requires=['protobuf>=3.13.0', 'numpy', 'lz4'],
+    install_requires=['numpy'],
+    extras_require={
+        'full': [
+            'protobuf>=3.13.0',
+            'lz4',
+            'requests',
+            'matplotlib',
+            'Pillow',
+            'rich',
+            'trimesh',
+            'scipy',
+            'av',
+            'fastapi',
+        ]
+    },
     classifiers=[
         'Development Status :: 5 - Production/Stable',
         'Intended Audience :: Developers',

diff --git a/tests/unit/array/test_from_to_bytes.py b/tests/unit/array/test_from_to_bytes.py
@@ -1,20 +1,56 @@
+import numpy as np
 import pytest
+import tensorflow as tf
+import torch
+from scipy.sparse import csr_matrix, coo_matrix, bsr_matrix, csc_matrix
 
 from docarray import DocumentArray
+from docarray.math.ndarray import to_numpy_array
 from tests import random_docs
 
 
+def get_ndarrays_for_ravel():
+    a = np.random.random([100, 3])
+    a[a > 0.5] = 0
+    return [
+        (a, False),
+        (torch.tensor(a), False),
+        (tf.constant(a), False),
+        (torch.tensor(a).to_sparse(), True),
+        # (tf.sparse.from_dense(a), True),
+        (csr_matrix(a), True),
+        (bsr_matrix(a), True),
+        (coo_matrix(a), True),
+        (csc_matrix(a), True),
+    ]
+
+
+@pytest.mark.parametrize('ndarray_val, is_sparse', get_ndarrays_for_ravel())
 @pytest.mark.parametrize('target_da', [DocumentArray.empty(100), random_docs(100)])
 @pytest.mark.parametrize(
     'protocol', ['protobuf', 'protobuf-once', 'pickle', 'pickle-once']
 )
 @pytest.mark.parametrize('compress', ['lz4', 'bz2', 'lzma', 'zlib', 'gzip', None])
-def test_to_from_bytes(target_da, protocol, compress):
+def test_to_from_bytes(target_da, protocol, compress, ndarray_val, is_sparse):
     bstr = target_da.to_bytes(protocol=protocol, compress=compress)
     print(protocol, compress, len(bstr))
     da2 = DocumentArray.from_bytes(bstr, protocol=protocol, compress=compress)
     assert len(da2) == len(target_da)
 
+    target_da.embeddings = ndarray_val
+    target_da.blobs = ndarray_val
+    bstr = target_da.to_bytes(protocol=protocol, compress=compress)
+    print(protocol, compress, len(bstr))
+    da2 = DocumentArray.from_bytes(bstr, protocol=protocol, compress=compress)
+    assert len(da2) == len(target_da)
+
+    np.testing.assert_almost_equal(
+        to_numpy_array(target_da.embeddings), to_numpy_array(da2.embeddings)
+    )
+    np.testing.assert_almost_equal(
+        to_numpy_array(target_da.blobs), to_numpy_array(da2.blobs)
+    )
+
 
 @pytest.mark.parametrize('target_da', [DocumentArray.empty(100), random_docs(100)])
 @pytest.mark.parametrize(
@@ -37,3 +73,8 @@ def test_save_bytes(target_da, protocol, compress, tmpfile):
 @pytest.mark.parametrize('target_da', [DocumentArray.empty(100), random_docs(100)])
 def test_from_to_protobuf(target_da):
     DocumentArray.from_protobuf(target_da.to_protobuf())
+
+
+@pytest.mark.parametrize('target_da', [DocumentArray.empty(100), random_docs(100)])
+def test_from_to_safe_list(target_da):
+    DocumentArray.from_list_safe(target_da.to_list_safe())
diff --git a/tests/unit/array/test_ravel_unravel.py b/tests/unit/array/test_ravel_unravel.py
@@ -32,7 +32,6 @@ def get_ndarrays_for_ravel():
 def test_ravel_embeddings_blobs(ndarray_val, attr, is_sparse):
     da = DocumentArray.empty(10)
     setattr(da, attr, ndarray_val)
-
     ndav = getattr(da, attr)
 
     # test read/getter
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,3 +12,4 @@ matplotlib @@
     rich
     Pillow
     lz4
+    fastapi