From 8952b511f9625a37f03c16f4d59450ec01eae48e Mon Sep 17 00:00:00 2001 From: Marco Vela <36453977+characat0@users.noreply.github.com> Date: Fri, 27 Sep 2024 19:26:34 -0500 Subject: [PATCH 1/4] Update loggers.jl --- src/loggers.jl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/loggers.jl b/src/loggers.jl index 183befd..9722370 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -86,8 +86,15 @@ path of the artifact that was created. function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::AbstractString, data) mlflowrun = getrun(mlf, run_id) artifact_uri = mlflowrun.info.artifact_uri - mkpath(artifact_uri) filepath = joinpath(artifact_uri, basefilename) + scheme = URI(artifact_uri).scheme + if scheme == "mlflow-artifacts" + u = URI("$(mlf.baseuri)/api/$(mlf.apiversion)/mlflow-artifacts/artifacts/$(basefilename)") + apiheaders = headers(mlf, Dict("Content-Type"=>"application/octet-stream")) + HTTP.put(u, apiheaders, data) + return filepath + end + mkpath(artifact_uri) try f = open(filepath, "w") write(f, data) From e2c15d96b01402325c6d7dec6656beb46325785c Mon Sep 17 00:00:00 2001 From: characat0 <36453977+characat0@users.noreply.github.com> Date: Fri, 27 Sep 2024 20:24:18 -0500 Subject: [PATCH 2/4] refactor tests --- test/test_experiments.jl | 20 +++++++++++--------- test/test_functional.jl | 2 +- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/test/test_experiments.jl b/test/test_experiments.jl index cc697f4..3ffb0ca 100644 --- a/test/test_experiments.jl +++ b/test/test_experiments.jl @@ -47,38 +47,40 @@ end @testset "deleteexperiment" begin @ensuremlf exp = createexperiment(mlf) + experiments_before = searchexperiments(mlf) deleteexperiment(mlf, exp) - experiments = searchexperiments(mlf) - @test length(experiments) == 1 # 1 for the default experiment + experiments_after = searchexperiments(mlf) + @test length(experiments_after) == length(experiments_before) - 1 # 1 for the default experiment end @testset "restoreexperiment" begin @ensuremlf exp = createexperiment(mlf) + experiments_before = searchexperiments(mlf) deleteexperiment(mlf, exp) - experiments = searchexperiments(mlf) - @test length(experiments) == 1 # 1 for the default experiment + experiments_after = searchexperiments(mlf) + @test length(experiments_after) == length(experiments_before) - 1 # 1 for the default experiment restoreexperiment(mlf, exp) - experiments = searchexperiments(mlf) - @test length(experiments) == 2 # the restored experiment and the default one + experiments_after_2 = searchexperiments(mlf) + @test length(experiments_after_2) == length(experiments_after) + 1 # the restored experiment and the default one deleteexperiment(mlf, exp) end @testset verbose = true "searchexperiments" begin @ensuremlf - n_experiments = 3 - for i in 2:n_experiments + n_experiments_before = length(searchexperiments(mlf)) + for i in 1:2 createexperiment(mlf) end createexperiment(mlf; name="test") experiments = searchexperiments(mlf) @testset "searchexperiments_get_all" begin - @test length(experiments) == (n_experiments + 1) # Adding one for the default experiment + @test length(experiments) == (n_experiments_before + 3) # Adding one for the default experiment end @testset "searchexperiments_by_filter" begin diff --git a/test/test_functional.jl b/test/test_functional.jl index 400ba06..ef79659 100644 --- a/test/test_functional.jl +++ b/test/test_functional.jl @@ -61,7 +61,7 @@ end # this is what happens in github actions - mlflow runs in a container, the artifact_uri is not immediately available, and tests are skipped artifact_uri = exprun.info.artifact_uri if isdir(artifact_uri) - @test_throws SystemError logartifact(mlf, exprun, "/etc/shadow") + @test_throws ErrorException logartifact(mlf, exprun, "/etc/shadow") tmpfiletoupload = "sometempfilename.txt" f = open(tmpfiletoupload, "w") From 373b74ebbfc2d00a9a9e375c8a9174070ac43926 Mon Sep 17 00:00:00 2001 From: characat0 <36453977+characat0@users.noreply.github.com> Date: Fri, 27 Sep 2024 20:49:09 -0500 Subject: [PATCH 3/4] allow upload to artifact store --- Project.toml | 2 +- src/MLFlowClient.jl | 1 + src/api.jl | 24 +++++++++++++++++++++++- src/loggers.jl | 7 ++----- src/utils.jl | 11 +++++++++++ test/test_functional.jl | 10 ++++++++++ 6 files changed, 48 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index 59d3b69..d1a42f2 100644 --- a/Project.toml +++ b/Project.toml @@ -6,6 +6,7 @@ version = "0.5.1" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" +FileTypes = "b58e86d0-4a47-4fce-a54d-8006a143ed90" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" ShowCases = "605ecd9f-84a6-4c9e-81e2-4798472b76a3" @@ -25,4 +26,3 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] test = ["Test"] - diff --git a/src/MLFlowClient.jl b/src/MLFlowClient.jl index b9a179a..47b811b 100644 --- a/src/MLFlowClient.jl +++ b/src/MLFlowClient.jl @@ -19,6 +19,7 @@ using URIs using JSON using ShowCases using FilePathsBase: AbstractPath +using FileTypes include("types/mlflow.jl") export diff --git a/src/api.jl b/src/api.jl index fb2edf8..7475592 100644 --- a/src/api.jl +++ b/src/api.jl @@ -31,4 +31,26 @@ function mlfpost(mlf, endpoint; kwargs...) catch e throw(e) end -end \ No newline at end of file +end + +""" + mlfput_artifact(mlf, artifact_uri, filename, data) + +Performs a HTTP PUT to upload the specified artifact. +Assumes that the artifact store is hosted in the mlflow server. +""" +function mlfput_artifact(mlf, artifact_uri, filename, data) + artifact_path = chopprefix(artifact_uri, "mlflow-artifacts:/") + if artifact_path == artifact_uri + error("Artifact URI must start with `mlflow-artifacts:/`") + end + content_type = guess_mime(filename).mime + apiuri = URI("$(mlf.apiroot)/$(mlf.apiversion)/mlflow-artifacts/artifacts/$(artifact_path)/$(filename)") + apiheaders = headers(mlf, Dict("Content-Type" => content_type)) + try + response = HTTP.put(apiuri, apiheaders, data) + return JSON.parse(String(response.body)) + catch e + throw(e) + end +end diff --git a/src/loggers.jl b/src/loggers.jl index 9722370..61b4765 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -87,11 +87,8 @@ function logartifact(mlf::MLFlow, run_id::AbstractString, basefilename::Abstract mlflowrun = getrun(mlf, run_id) artifact_uri = mlflowrun.info.artifact_uri filepath = joinpath(artifact_uri, basefilename) - scheme = URI(artifact_uri).scheme - if scheme == "mlflow-artifacts" - u = URI("$(mlf.baseuri)/api/$(mlf.apiversion)/mlflow-artifacts/artifacts/$(basefilename)") - apiheaders = headers(mlf, Dict("Content-Type"=>"application/octet-stream")) - HTTP.put(u, apiheaders, data) + if startswith(artifact_uri, "mlflow-artifacts:/") + mlfput_artifact(mlf, artifact_uri, basefilename, data) return filepath end mkpath(artifact_uri) diff --git a/src/utils.jl b/src/utils.jl index bc8e225..1f15195 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -51,6 +51,17 @@ function generatefilterfromentity_type(filter_params::AbstractDict{K,V}, entity_ join(filters, " and ") end +function guess_mime(data)::FileTypes.FileType.Type + for i in FileTypes.Types + for (key, value) in i + if value(data) + return key + end + end + end + FileTypes.FileType.Type("", MIME("application/octet-stream")) +end + """ generatefilterfromparams(filter_params::AbstractDict{K,V}) where {K,V} diff --git a/test/test_functional.jl b/test/test_functional.jl index ef79659..26b883f 100644 --- a/test/test_functional.jl +++ b/test/test_functional.jl @@ -60,6 +60,16 @@ end # when running mlflow in a container, the below tests will be skipped # this is what happens in github actions - mlflow runs in a container, the artifact_uri is not immediately available, and tests are skipped artifact_uri = exprun.info.artifact_uri + if startswith(artifact_uri, "mlflow-artifacts:/") + tmp = tempdir() + mkpath(tmp) + tmpfiletoupload = "$(tmp)/sometempfilename.txt" + f = open(tmpfiletoupload, "w") + write(f, "samplecontents") + close(f) + logartifact(mlf, exprun, tmpfiletoupload) + end + if isdir(artifact_uri) @test_throws ErrorException logartifact(mlf, exprun, "/etc/shadow") From eca88ef0d84787161949cf1d2614260fadae747b Mon Sep 17 00:00:00 2001 From: characat0 <36453977+characat0@users.noreply.github.com> Date: Fri, 27 Sep 2024 20:51:52 -0500 Subject: [PATCH 4/4] update docstring --- src/loggers.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/loggers.jl b/src/loggers.jl index 61b4765..ee3b4c8 100644 --- a/src/loggers.jl +++ b/src/loggers.jl @@ -68,8 +68,7 @@ end Stores an artifact (file) in the run's artifact location. !!! note - Assumes that artifact_uri is mapped to a local directory. - At the moment, this only works if both MLFlow and the client are running on the same host or they map a directory that leads to the same location over NFS, for example. + Only supports local and proxied artifact stores. # Arguments - `mlf::MLFlow`: [`MLFlow`](@ref) onfiguration. Currently not used, but when this method is extended to support `S3`, information from `mlf` will be needed.