Skip to content

Commit

Permalink
Removed overwrite and public arguments from hub.load (#1275)
Browse files Browse the repository at this point in the history
* removed overwrite and public from hub.load

* update tests with api change
  • Loading branch information
AbhinavTuli authored Oct 26, 2021
1 parent eb53b89 commit c1f908e
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 13 deletions.
13 changes: 2 additions & 11 deletions hub/api/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,6 @@ def empty(
def load(
path: str,
read_only: bool = False,
overwrite: bool = False,
public: Optional[bool] = True,
memory_cache_size: int = DEFAULT_MEMORY_CACHE_SIZE,
local_cache_size: int = DEFAULT_LOCAL_CACHE_SIZE,
creds: Optional[dict] = None,
Expand All @@ -158,9 +156,6 @@ def load(
) -> Dataset:
"""Loads an existing dataset
Important:
Using `overwrite` will delete all of your data if it exists! Be very careful when setting this parameter.
Args:
path (str): The full path to the dataset. Can be:-
- a Hub cloud path of the form hub://username/datasetname. To write to Hub cloud datasets, ensure that you are logged in to Hub (use 'activeloop login' from command line)
Expand All @@ -169,8 +164,6 @@ def load(
- a memory path of the form mem://path/to/dataset which doesn't save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.
read_only (bool): Opens dataset in read only mode if this is passed as True. Defaults to False.
Datasets stored on Hub cloud that your account does not have write access to will automatically open in read mode.
overwrite (bool): WARNING: If set to True this overwrites the dataset if it already exists. This can NOT be undone! Defaults to False.
public (bool, optional): Defines if the dataset will have public access. Applicable only if Hub cloud storage is used and a new Dataset is being created. Defaults to True.
memory_cache_size (int): The size of the memory cache to be used in MB.
local_cache_size (int): The size of the local filesystem cache to be used in MB.
creds (dict, optional): A dictionary containing credentials used to access the dataset at the path.
Expand All @@ -188,7 +181,7 @@ def load(
if creds is None:
creds = {}

feature_report_path(path, "load", {"Overwrite": overwrite})
feature_report_path(path, "load", {})

storage, cache_chain = get_storage_and_cache_chain(
path=path,
Expand All @@ -203,12 +196,10 @@ def load(
raise DatasetHandlerError(
f"A Hub dataset does not exist at the given path ({path}). Check the path provided or in case you want to create a new dataset, use hub.empty()."
)
if overwrite:
storage.clear()

read_only = storage.read_only
return get_dataset_instance(
path, storage=cache_chain, read_only=read_only, public=public, token=token
path, storage=cache_chain, read_only=read_only, token=token
)

@staticmethod
Expand Down
4 changes: 2 additions & 2 deletions hub/api/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_dataset_empty_load():
with pytest.raises(DatasetHandlerError):
ds_random = hub.load("some_random_path")

ds_overwrite_load = hub.load(path, overwrite=True)
ds_overwrite_load = hub.dataset(path, overwrite=True)
assert len(ds_overwrite_load) == 0
assert len(ds_overwrite_load.tensors) == 0
with ds_overwrite_load:
Expand All @@ -54,7 +54,7 @@ def test_dataset_empty_load():
with pytest.raises(DatasetHandlerError):
ds_empty = hub.empty(path)

ds_overwrite_empty = hub.load(path, overwrite=True)
ds_overwrite_empty = hub.dataset(path, overwrite=True)
assert len(ds_overwrite_empty) == 0
assert len(ds_overwrite_empty.tensors) == 0

Expand Down

0 comments on commit c1f908e

Please sign in to comment.