Skip to content

Commit

Permalink
Merge branch 'main' into dependabot/github_actions/pypa/gh-action-pyp…
Browse files Browse the repository at this point in the history
…i-publish-1.8.6
  • Loading branch information
ant0nsc authored May 22, 2023
2 parents d27bcb1 + 26ff7d0 commit 8f577c9
Show file tree
Hide file tree
Showing 18 changed files with 759 additions and 449 deletions.
73 changes: 21 additions & 52 deletions hi-ml-azure/src/health_azure/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@
from azureml.dataprep.fuse.daemon import MountContext
from azureml.exceptions._azureml_exception import UserErrorException

from health_azure.utils import PathOrString, get_workspace, get_ml_client
from health_azure.utils import PathOrString, get_ml_client

logger = logging.getLogger(__name__)


V1OrV2DataType = Union[FileDataset, Data]
Expand Down Expand Up @@ -128,11 +130,14 @@ def _get_or_create_v1_dataset(datastore_name: str, dataset_name: str, workspace:
try:
azureml_dataset = _retrieve_v1_dataset(dataset_name, workspace)
except UserErrorException:
logger.warning(f"Dataset '{dataset_name}' was not found, or is not an AzureML SDK v1 dataset.")
logger.info(f"Trying to create a new dataset '{dataset_name}' from files in folder '{dataset_name}'")
if datastore_name == "":
raise ValueError(
"When creating a new dataset, a datastore name must be provided. Please specify a datastore name using "
"the --datastore flag"
)
logger.info(f"Trying to create a new dataset '{dataset_name}' in datastore '{datastore_name}'")
azureml_dataset = _create_v1_dataset(datastore_name, dataset_name, workspace)
return azureml_dataset

Expand Down Expand Up @@ -352,10 +357,8 @@ def __init__(

def to_input_dataset_local(
self,
strictly_aml_v1: bool,
workspace: Workspace = None,
ml_client: Optional[MLClient] = None,
) -> Tuple[Optional[Path], Optional[MountContext]]:
workspace: Workspace,
) -> Tuple[Path, Optional[MountContext]]:
"""
Return a local path to the dataset when outside of an AzureML run.
If local_folder is supplied, then this is assumed to be a local dataset, and this is returned.
Expand All @@ -364,9 +367,6 @@ def to_input_dataset_local(
therefore a tuple of Nones will be returned.
:param workspace: The AzureML workspace to read from.
:param strictly_aml_v1: If True, use Azure ML SDK v1 to attempt to find or create and reigster the dataset.
Otherwise, attempt to use Azure ML SDK v2.
:param ml_client: An Azure MLClient object for interacting with Azure resources.
:return: Tuple of (path to dataset, optional mountcontext)
"""
status = f"Dataset '{self.name}' will be "
Expand All @@ -381,12 +381,10 @@ def to_input_dataset_local(
f"Unable to make dataset '{self.name} available for a local run because no AzureML "
"workspace has been provided. Provide a workspace, or set a folder for local execution."
)
azureml_dataset = get_or_create_dataset(
azureml_dataset = _get_or_create_v1_dataset(
datastore_name=self.datastore,
dataset_name=self.name,
workspace=workspace,
strictly_aml_v1=strictly_aml_v1,
ml_client=ml_client,
)
if isinstance(azureml_dataset, FileDataset):
target_path = self.target_folder or Path(tempfile.mkdtemp())
Expand All @@ -404,7 +402,7 @@ def to_input_dataset_local(
print(status)
return result
else:
return None, None
raise ValueError(f"Don't know how to handle dataset '{self.name}' of type {type(azureml_dataset)}")

def to_input_dataset(
self,
Expand Down Expand Up @@ -556,60 +554,31 @@ def create_dataset_configs(
return datasets


def find_workspace_for_local_datasets(
aml_workspace: Optional[Workspace], workspace_config_path: Optional[Path], dataset_configs: List[DatasetConfig]
) -> Optional[Workspace]:
"""
If any of the dataset_configs require an AzureML workspace then try to get one, otherwise return None.
:param aml_workspace: There are two optional parameters used to glean an existing AzureML Workspace. The simplest is
to pass it in as a parameter.
:param workspace_config_path: The 2nd option is to specify the path to the config.json file downloaded from the
Azure portal from which we can retrieve the existing Workspace.
:param dataset_configs: List of DatasetConfig describing the input datasets.
:return: Workspace if required, None otherwise.
"""
workspace: Workspace = None
# Check whether an attempt will be made to mount or download a dataset when running locally.
# If so, try to get the AzureML workspace.
if any(dc.local_folder is None for dc in dataset_configs):
try:
workspace = get_workspace(aml_workspace, workspace_config_path)
logging.info(f"Found workspace for datasets: {workspace.name}")
except Exception as ex:
logging.info(f"Could not find workspace for datasets. Exception: {ex}")
return workspace


def setup_local_datasets(
dataset_configs: List[DatasetConfig],
strictly_aml_v1: bool,
aml_workspace: Optional[Workspace] = None,
ml_client: Optional[MLClient] = None,
workspace_config_path: Optional[Path] = None,
) -> Tuple[List[Optional[Path]], List[MountContext]]:
workspace: Optional[Workspace],
) -> Tuple[List[Path], List[MountContext]]:
"""
When running outside of AzureML, setup datasets to be used locally.
For each DatasetConfig, if local_folder is supplied, then this is assumed to be a local dataset, and this is
used. Otherwise the dataset is mounted or downloaded to either the target folder or a temporary folder and that is
used.
:param aml_workspace: There are two optional parameters used to glean an existing AzureML Workspace. The simplest is
to pass it in as a parameter.
:param workspace_config_path: The 2nd option is to specify the path to the config.json file downloaded from the
Azure portal from which we can retrieve the existing Workspace.
If a dataset does not exist, an AzureML SDK v1 dataset will be created, assuming that the dataset is given
in a folder of the same name (for example, if a dataset is given as "mydataset", then it is created from the files
in folder "mydataset" in the datastore).
:param workspace: The AzureML workspace to work with. Can be None if the list of datasets is empty, or if
the datasets are available local.
:param dataset_configs: List of DatasetConfig describing the input data assets.
:param strictly_aml_v1: If True, use Azure ML SDK v1. Otherwise, attempt to use Azure ML SDK v2.
:param ml_client: An MLClient object for interacting with AML v2 datastores.
:return: Pair of: list of optional paths to the input datasets, list of mountcontexts, one for each mounted dataset.
:return: Pair of: list of paths to the input datasets, list of mountcontexts, one for each mounted dataset.
"""
workspace = find_workspace_for_local_datasets(aml_workspace, workspace_config_path, dataset_configs)
mounted_input_datasets: List[Optional[Path]] = []
mounted_input_datasets: List[Path] = []
mount_contexts: List[MountContext] = []

for data_config in dataset_configs:
target_path, mount_context = data_config.to_input_dataset_local(strictly_aml_v1, workspace, ml_client)
target_path, mount_context = data_config.to_input_dataset_local(workspace)

mounted_input_datasets.append(target_path)

Expand Down
90 changes: 48 additions & 42 deletions hi-ml-azure/src/health_azure/himl.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,21 +442,20 @@ def effective_experiment_name(experiment_name: Optional[str], entry_script: Opti


def submit_run_v2(
workspace: Optional[Workspace],
ml_client: MLClient,
environment: EnvironmentV2,
entry_script: PathOrString,
script_params: List[str],
compute_target: str,
environment_variables: Optional[Dict[str, str]] = None,
experiment_name: Optional[str] = None,
input_datasets_v2: Optional[Dict[str, Input]] = None,
output_datasets_v2: Optional[Dict[str, Output]] = None,
snapshot_root_directory: Optional[Path] = None,
entry_script: Optional[PathOrString] = None,
script_params: Optional[List[str]] = None,
compute_target: Optional[str] = None,
tags: Optional[Dict[str, str]] = None,
docker_shm_size: str = "",
wait_for_completion: bool = False,
identity_based_auth: bool = False,
workspace_config_path: Optional[PathOrString] = None,
ml_client: Optional[MLClient] = None,
hyperparam_args: Optional[Dict[str, Any]] = None,
num_nodes: int = 1,
pytorch_processes_per_node: Optional[int] = None,
Expand All @@ -465,26 +464,23 @@ def submit_run_v2(
"""
Starts a v2 AML Job on a given workspace by submitting a command
:param workspace: The AzureML workspace to use.
:param ml_client: An Azure MLClient object for interacting with Azure resources.
:param environment: An AML v2 Environment object.
:param entry_script: The script that should be run in AzureML.
:param script_params: A list of parameter to pass on to the script as it runs in AzureML.
:param compute_target: The name of a compute target in Azure ML to submit the job to.
:param environment_variables: The environment variables that should be set when running in AzureML.
:param experiment_name: The name of the experiment that will be used or created. If the experiment name contains
characters that are not valid in Azure, those will be removed.
:param input_datasets_v2: An optional dictionary of Inputs to pass in to the command.
:param output_datasets_v2: An optional dictionary of Outputs to pass in to the command.
:param snapshot_root_directory: The directory that contains all code that should be packaged and sent to AzureML.
All Python code that the script uses must be copied over.
:param entry_script: The script that should be run in AzureML.
:param script_params: A list of parameter to pass on to the script as it runs in AzureML.
:param compute_target: Optional name of a compute target in Azure ML to submit the job to. If None, will run
locally.
:param tags: A dictionary of string key/value pairs, that will be added as metadata to the run. If set to None,
a default metadata field will be added that only contains the commandline arguments that started the run.
:param docker_shm_size: The Docker shared memory size that should be used when creating a new Docker image.
:param wait_for_completion: If False (the default) return after the run is submitted to AzureML, otherwise wait for
the completion of this run (if True).
:param workspace_config_path: If not provided with an AzureML Workspace, then load one given the information in this
config
:param ml_client: An Azure MLClient object for interacting with Azure resources.
:param hyperparam_args: A dictionary of hyperparameter search args to pass into a sweep job.
:param num_nodes: The number of nodes to use for the job in AzureML. The value must be 1 or greater.
:param pytorch_processes_per_node: For plain PyTorch multi-GPU processing: The number of processes per node.
Expand All @@ -494,20 +490,6 @@ def submit_run_v2(
display name will be generated by AzureML.
:return: An AzureML Run object.
"""
if ml_client is None:
if workspace is not None:
ml_client = get_ml_client(
subscription_id=workspace.subscription_id,
resource_group=workspace.resource_group,
workspace_name=workspace.name,
)
elif workspace_config_path is not None:
ml_client = get_ml_client(workspace_config_path=workspace_config_path)
else:
raise ValueError("Either workspace or workspace_config_path must be specified to connect to the Workspace")

assert compute_target is not None, "No compute_target has been provided"
assert entry_script is not None, "No entry_script has been provided"
snapshot_root_directory = snapshot_root_directory or Path.cwd()
root_dir = Path(snapshot_root_directory)

Expand Down Expand Up @@ -547,6 +529,7 @@ def create_command_job(cmd: str) -> Command:
inputs=input_datasets_v2,
outputs=output_datasets_v2,
environment=environment.name + "@latest",
environment_variables=environment_variables,
compute=compute_target,
experiment_name=experiment_name,
tags=tags or {},
Expand Down Expand Up @@ -589,7 +572,11 @@ def create_command_job(cmd: str) -> Command:
job_to_submit = create_command_job(cmd)

returned_job = ml_client.jobs.create_or_update(job_to_submit)
print(f"URL to job: {returned_job.services['Studio'].endpoint}") # type: ignore
print("\n==============================================================================")
# The ID field looks like /subscriptions/<sub>/resourceGroups/<rg?/providers/Microsoft.MachineLearningServices/..
print(f"Successfully queued run {(returned_job.id or '').split('/')[-1]}")
print(f"Run URL: {returned_job.services['Studio'].endpoint}") # type: ignore
print("==============================================================================\n")
if wait_for_completion:
print("Waiting for the completion of the AzureML job.")
wait_for_job_completion(ml_client, job_name=returned_job.name)
Expand Down Expand Up @@ -668,7 +655,7 @@ def submit_run(

# These need to be 'print' not 'logging.info' so that the calling script sees them outside AzureML
print("\n==============================================================================")
print(f"Successfully queued run number {run.number} (ID {run.id}) in experiment {run.experiment.name}")
print(f"Successfully queued run {run.id} in experiment {run.experiment.name}")
print(f"Experiment name and run ID are available in file {RUN_RECOVERY_FILE}")
print(f"Experiment URL: {run.experiment.get_portal_url()}")
print(f"Run URL: {run.get_portal_url()}")
Expand Down Expand Up @@ -882,6 +869,18 @@ def submit_to_azure_if_needed( # type: ignore
# is necessary. If not, return to the caller for local execution.
if submit_to_azureml is None:
submit_to_azureml = AZUREML_FLAG in sys.argv[1:]

has_input_datasets = len(cleaned_input_datasets) > 0
if submit_to_azureml or has_input_datasets:
if strictly_aml_v1:
aml_workspace = get_workspace(aml_workspace, workspace_config_path)
assert aml_workspace is not None
print(f"Loaded AzureML workspace {aml_workspace.name}")
else:
ml_client = get_ml_client(ml_client=ml_client, workspace_config_path=workspace_config_path)
assert ml_client is not None
print(f"Created MLClient for AzureML workspace {ml_client.workspace_name}")

if not submit_to_azureml:
# Set the environment variables for local execution.
environment_variables = {**DEFAULT_ENVIRONMENT_VARIABLES, **(environment_variables or {})}
Expand All @@ -895,16 +894,24 @@ def submit_to_azure_if_needed( # type: ignore
logs_folder = Path.cwd() / LOGS_FOLDER
logs_folder.mkdir(exist_ok=True)

any_local_folders_missing = any(dataset.local_folder is None for dataset in cleaned_input_datasets)

if has_input_datasets and any_local_folders_missing and not strictly_aml_v1:
raise ValueError(
"AzureML SDK v2 does not support downloading datasets from AzureML for local execution. "
"Please switch to AzureML SDK v1 by setting strictly_aml_v1=True, or use "
"--strictly_aml_v1 on the commandline, or provide a local folder for each input dataset. "
"Note that you will not be able use AzureML datasets for runs outside AzureML if the datasets were "
"created via SDK v2."
)

mounted_input_datasets, mount_contexts = setup_local_datasets(
cleaned_input_datasets,
strictly_aml_v1,
aml_workspace=aml_workspace,
ml_client=ml_client,
workspace_config_path=workspace_config_path,
workspace=aml_workspace,
)

return AzureRunInfo(
input_datasets=mounted_input_datasets,
input_datasets=mounted_input_datasets, # type: ignore
output_datasets=[d.local_folder for d in cleaned_output_datasets],
mount_contexts=mount_contexts,
run=None,
Expand All @@ -917,9 +924,6 @@ def submit_to_azure_if_needed( # type: ignore
print(f"No snapshot root directory given. Uploading all files in the current directory {Path.cwd()}")
snapshot_root_directory = Path.cwd()

workspace = get_workspace(aml_workspace, workspace_config_path)
print(f"Loaded AzureML workspace {workspace.name}")

if conda_environment_file is None:
conda_environment_file = find_file_in_parent_to_pythonpath(CONDA_ENVIRONMENT_FILE)
if conda_environment_file is None:
Expand All @@ -935,8 +939,9 @@ def submit_to_azure_if_needed( # type: ignore

with append_to_amlignore(amlignore=amlignore_path, lines_to_append=lines_to_append):
if strictly_aml_v1:
assert aml_workspace is not None, "An AzureML workspace should have been created already."
run_config = create_run_configuration(
workspace=workspace,
workspace=aml_workspace,
compute_cluster_name=compute_cluster_name,
aml_environment_name=aml_environment_name,
conda_environment_file=conda_environment_file,
Expand Down Expand Up @@ -965,7 +970,7 @@ def submit_to_azure_if_needed( # type: ignore
config_to_submit = script_run_config

run = submit_run(
workspace=workspace,
workspace=aml_workspace,
experiment_name=effective_experiment_name(experiment_name, script_run_config.script),
script_run_config=config_to_submit,
tags=tags,
Expand All @@ -976,6 +981,7 @@ def submit_to_azure_if_needed( # type: ignore
if after_submission is not None:
after_submission(run) # type: ignore
else:
assert ml_client is not None, "An AzureML MLClient should have been created already."
if conda_environment_file is None:
raise ValueError("Argument 'conda_environment_file' must be specified when using AzureML v2")
environment = create_python_environment_v2(
Expand All @@ -984,17 +990,17 @@ def submit_to_azure_if_needed( # type: ignore
if entry_script is None:
entry_script = Path(sys.argv[0])

ml_client = get_ml_client(ml_client=ml_client, aml_workspace=workspace)
registered_env = register_environment_v2(environment, ml_client)
input_datasets_v2 = create_v2_inputs(ml_client, cleaned_input_datasets)
output_datasets_v2 = create_v2_outputs(ml_client, cleaned_output_datasets)

job = submit_run_v2(
workspace=workspace,
ml_client=ml_client,
input_datasets_v2=input_datasets_v2,
output_datasets_v2=output_datasets_v2,
experiment_name=experiment_name,
environment=registered_env,
environment_variables=environment_variables,
snapshot_root_directory=snapshot_root_directory,
entry_script=entry_script,
script_params=script_params,
Expand Down
7 changes: 1 addition & 6 deletions hi-ml-azure/src/health_azure/himl_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,7 @@ def main() -> None: # pragma: no cover

files_to_download = download_config.files_to_download

workspace = get_workspace()
ml_client = get_ml_client(
subscription_id=workspace.subscription_id,
resource_group=workspace.resource_group,
workspace_name=workspace.name,
)
ml_client = get_ml_client()
for run_id in download_config.run:
download_job_outputs_logs(ml_client, run_id, file_to_download_path=files_to_download, download_dir=output_dir)
print("Successfully downloaded output and log files")
Expand Down
1 change: 0 additions & 1 deletion hi-ml-azure/src/health_azure/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from health_azure.utils import ENV_LOCAL_RANK, check_is_any_of, is_global_rank_zero

logging_stdout_handler: Optional[logging.StreamHandler] = None
logging_to_file_handler: Optional[logging.StreamHandler] = None


def logging_to_stdout(log_level: Union[int, str] = logging.INFO) -> None:
Expand Down
Loading

0 comments on commit 8f577c9

Please sign in to comment.