diff --git a/README.md b/README.md index aa84323..347d151 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ This package provides additional functionality for working with `missing` values - `Missings.replace` to wrap a collection in a (possibly indexable) iterator replacing `missing` with another value - `Missings.fail` to wrap a collection in a (possibly indexable) iterator throwing an error if `missing` is encountered - `skipmissings` to loop through a collection of iterators excluding indices where any iterators are `missing` +- `missingsmallest(f)` to create a partial order function that treats `missing` as the smallest value and otherwise behaves like `f` +- `missingsmallest`: the standard `isless` function modified to treat `missing` as the smallest value rather than the largest one ## Contributing and Questions diff --git a/src/Missings.jl b/src/Missings.jl index 08ed26c..e3119dd 100644 --- a/src/Missings.jl +++ b/src/Missings.jl @@ -2,7 +2,7 @@ module Missings export allowmissing, disallowmissing, ismissing, missing, missings, Missing, MissingException, levels, coalesce, passmissing, nonmissingtype, - skipmissings, emptymissing + skipmissings, emptymissing, missingsmallest using Base: ismissing, missing, Missing, MissingException @@ -514,4 +514,84 @@ julia> emptymissing(first)([1], 2) """ emptymissing(f) = (x, args...; kwargs...) -> isempty(x) ? missing : f(x, args...; kwargs...) -end # module +# Only for internal use. Allows dispatch over anonymous functions. +struct MissingSmallest{T} + lt::T +end + +""" + missingsmallest(f) + +Return a function of two arguments `x` and `y` that tests whether `x` is less +than `y` such that `missing` is always less than the other argument. In other +words, return a modified version of the partial order function `f` such that +`missing` is the smallest possible value, and all other non-`missing` values are +compared according to `f`. + +The behavior of the standard `isless` function modified to treat `missing` as +the smallest value can be obtained by calling the 2-argument `missingsmallest(x, +y)` function. This is equivalent to `missingsmallest(isless)(x, y)`. + +# Examples +``` +julia> isshorter = missingsmallest((s1, s2) -> isless(length(s1), length(s2))); + +julia> isshorter("short", "longstring") +true + +julia> isshorter("longstring", "short") +false + +julia> isshorter("", missing) +false +``` +""" +missingsmallest(f) = MissingSmallest(f) + +""" + missingsmallest(x, y) + +The standard partial order `isless` modified so that `missing` is always the +smallest possible value: +- If neither argument is `missing`, the function behaves exactly as `isless`. +- If `y` is `missing` the result will be `false` regardless of the value of `x`. +- If `x` is `missing` the result will be `true` unless `y` is `missing`. + +See also the 1-argument method which takes a partial ordering function (like +`isless`) and modifies it to treat `missing` as explained above. These functions +can be used together with sorting functions so that missing values are sorted +first. This is useful in particular so that when sorting in reverse order +missing values appear at the end. + +# Examples +```jldoctest +julia> sort(v, lt=missingsmallest) +5-element Vector{Union{Missing, Int64}}: + missing + missing + 1 + 2 + 10 + +julia> sort(v, lt=missingsmallest, rev=true) +5-element Vector{Union{Missing, Int64}}: + 10 + 2 + 1 + missing + missing + +julia> missingsmallest(missing, Inf) +true + +julia> missingsmallest(-Inf, missing) +false + +julia> missingsmallest(missing, missing) +false +""" +missingsmallest(x, y) = missingsmallest(isless)(x, y) + +(ms::MissingSmallest)(x, y) = ismissing(y) ? false : ismissing(x) ? true : ms.lt(x, y) + +end # module \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 92e8a2c..c8c9963 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -158,7 +158,7 @@ struct CubeRooter end @test disallowmissing(Any[:a]) == [:a] @test disallowmissing(Any[:a]) isa AbstractVector{Any} @test_throws MethodError disallowmissing([1, missing]) - @test_throws MethodError disallowmissing([missing]) + @test_throws Union{MethodError, ArgumentError} disallowmissing([missing]) @test disallowmissing(Union{Int, Missing}[1 1]) == [1 1] @test disallowmissing(Union{Int, Missing}[1 1]) isa AbstractArray{Int, 2} @@ -167,7 +167,7 @@ struct CubeRooter end @test disallowmissing([:a 1]) == [:a 1] @test disallowmissing([:a 1]) isa AbstractArray{Any, 2} @test_throws MethodError disallowmissing([1 missing]) - @test_throws MethodError disallowmissing([missing missing]) + @test_throws Union{MethodError, ArgumentError} disallowmissing([missing missing]) # Lifting ## functor @@ -257,4 +257,30 @@ struct CubeRooter end @test emptymissing(fun)(3, 1, c=2) == (1, 2) end +@testset "missingsmallest" begin + @test missingsmallest(missing, Inf) == true + @test missingsmallest(-Inf, missing) == false + @test missingsmallest(missing, missing) == false + @test missingsmallest(3, 4) == true + @test missingsmallest(-Inf, Inf) == true + + @test missingsmallest("a", "b") == true + @test missingsmallest("short", missing) == false + @test missingsmallest(missing, "") == true + + @test missingsmallest((1, 2), (3, 4)) == true + @test missingsmallest((3, 4), (1, 2)) == false + @test missingsmallest(missing, (1e3, 1e4)) == true + + # Compare strings by length, not lexicographically + isshorter = missingsmallest((s1, s2) -> isless(length(s1), length(s2))) + @test isshorter("short", "longstring") == true + @test isshorter("longstring", "short") == false + @test isshorter(missing, "short") == true + @test isshorter("", missing) == false + + @test_throws MethodError missingsmallest(isless)(isless) + @test missingsmallest !== missingsmallest(isless) +end + end