Removed overwrite and public arguments from hub.load (#1275)

* removed overwrite and public from hub.load * update tests with api change
activeloopai · Oct 26, 2021 · c1f908e · c1f908e
1 parent eb53b89
commit c1f908e
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 13 deletions.
diff --git a/hub/api/dataset.py b/hub/api/dataset.py
@@ -148,8 +148,6 @@ def empty(
     def load(
         path: str,
         read_only: bool = False,
-        overwrite: bool = False,
-        public: Optional[bool] = True,
         memory_cache_size: int = DEFAULT_MEMORY_CACHE_SIZE,
         local_cache_size: int = DEFAULT_LOCAL_CACHE_SIZE,
         creds: Optional[dict] = None,
@@ -158,9 +156,6 @@ def load(
     ) -> Dataset:
         """Loads an existing dataset
 
-        Important:
-            Using `overwrite` will delete all of your data if it exists! Be very careful when setting this parameter.
-
         Args:
             path (str): The full path to the dataset. Can be:-
                 - a Hub cloud path of the form hub://username/datasetname. To write to Hub cloud datasets, ensure that you are logged in to Hub (use 'activeloop login' from command line)
@@ -169,8 +164,6 @@ def load(
                 - a memory path of the form mem://path/to/dataset which doesn't save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.
             read_only (bool): Opens dataset in read only mode if this is passed as True. Defaults to False.
                 Datasets stored on Hub cloud that your account does not have write access to will automatically open in read mode.
-            overwrite (bool): WARNING: If set to True this overwrites the dataset if it already exists. This can NOT be undone! Defaults to False.
-            public (bool, optional): Defines if the dataset will have public access. Applicable only if Hub cloud storage is used and a new Dataset is being created. Defaults to True.
             memory_cache_size (int): The size of the memory cache to be used in MB.
             local_cache_size (int): The size of the local filesystem cache to be used in MB.
             creds (dict, optional): A dictionary containing credentials used to access the dataset at the path.
@@ -188,7 +181,7 @@ def load(
         if creds is None:
             creds = {}
 
-        feature_report_path(path, "load", {"Overwrite": overwrite})
+        feature_report_path(path, "load", {})
 
         storage, cache_chain = get_storage_and_cache_chain(
             path=path,
@@ -203,12 +196,10 @@ def load(
             raise DatasetHandlerError(
                 f"A Hub dataset does not exist at the given path ({path}). Check the path provided or in case you want to create a new dataset, use hub.empty()."
             )
-        if overwrite:
-            storage.clear()
 
         read_only = storage.read_only
         return get_dataset_instance(
-            path, storage=cache_chain, read_only=read_only, public=public, token=token
+            path, storage=cache_chain, read_only=read_only, token=token
         )
 
     @staticmethod

diff --git a/hub/api/tests/test_dataset.py b/hub/api/tests/test_dataset.py
@@ -41,7 +41,7 @@ def test_dataset_empty_load():
         with pytest.raises(DatasetHandlerError):
             ds_random = hub.load("some_random_path")
 
-        ds_overwrite_load = hub.load(path, overwrite=True)
+        ds_overwrite_load = hub.dataset(path, overwrite=True)
         assert len(ds_overwrite_load) == 0
         assert len(ds_overwrite_load.tensors) == 0
         with ds_overwrite_load:
@@ -54,7 +54,7 @@ def test_dataset_empty_load():
         with pytest.raises(DatasetHandlerError):
             ds_empty = hub.empty(path)
 
-        ds_overwrite_empty = hub.load(path, overwrite=True)
+        ds_overwrite_empty = hub.dataset(path, overwrite=True)
         assert len(ds_overwrite_empty) == 0
         assert len(ds_overwrite_empty.tensors) == 0