From d62567dc0c23d5c147f27f9931bb54ddfd2b85b7 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 8 Nov 2014 16:26:46 +0800 Subject: [PATCH 1/7] add probs for Bernoulli and Binomial --- src/testutils.jl | 11 +++++++++++ src/univariate/bernoulli.jl | 20 ++++++++++++++++++++ src/univariate/binomial.jl | 25 +++++++++++++++++++++++++ src/univariate/categorical.jl | 1 + test/discrete_ref.csv | 2 ++ test/discrete_ref.txt | 2 ++ 6 files changed, 61 insertions(+) diff --git a/src/testutils.jl b/src/testutils.jl index 5e3bfbab1..109939248 100644 --- a/src/testutils.jl +++ b/src/testutils.jl @@ -81,6 +81,17 @@ function test_samples(s::Sampleable{Univariate, Discrete}, # the sampleable p0 = pdf(distr, rmin:rmax) # reference probability masses @assert length(p0) == m + # check the consistency between probs and pdf + if isa(s, Distribution) + if applicable(probs, s, rmin:rmax) + @test_approx_eq probs(s, rmin:rmax) p0 + end + if applicable(probs, s) + @assert isfinite(vmin) && isfinite(vmax) + @test_approx_eq probs(s) probs(s, vmin:vmax) + end + end + # determine confidence intervals for counts: # with probability q, the count will be out of this interval. # diff --git a/src/univariate/bernoulli.jl b/src/univariate/bernoulli.jl index 9d875e355..bb70d9032 100644 --- a/src/univariate/bernoulli.jl +++ b/src/univariate/bernoulli.jl @@ -18,6 +18,26 @@ end Bernoulli() = Bernoulli(0.5) +probs(d::Bernoulli) = [d.p0, d.p1] + +function probs(d::Bernoulli, rgn::UnitRange) + n = length(rgn) + if n == 0 + return Float64[] + elseif n == 1 + f = rgn[1] + return f == zero(f) ? [d.p0] : + f == one(f) ? [d.p1] : + throw(BoundsError()) + elseif n == 2 + f = rgn[1] + return f == zero(f) ? [d.p0, d.p1] : + throw(BoundsError()) + else + throw(BoundsError()) + end +end + cdf(d::Bernoulli, q::Real) = q >= zero(q) ? (q >= one(q) ? 1.0 : d.p0) : 0. function entropy(d::Bernoulli) diff --git a/src/univariate/binomial.jl b/src/univariate/binomial.jl index e4b0bb975..78ab7d6ab 100644 --- a/src/univariate/binomial.jl +++ b/src/univariate/binomial.jl @@ -16,6 +16,31 @@ maximum(d::Binomial) = d.size @_jl_dist_2p Binomial binom +function _probs(d::Binomial, f::Int, l::Int) + n = d.size + p = d.prob + b = f - 1 + r = Array(Float64, l - b) + r[1] = v = pdf(d, f) + if l > f + c = p / (1.0 - p) + for k = f+1:l + v *= ((n - k + 1) / k * c) + r[k-b] = v + end + end + return r +end + +probs(d::Binomial) = _probs(d, 0, d.size) + +function probs(d::Binomial, rgn::UnitRange) + f, l = rgn[1], rgn[end] + 0 <= f <= l <= d.size || throw(BoundsError()) + _probs(d, f, l) +end + + function entropy(d::Binomial; approx::Bool=false) n = d.size p1 = d.prob diff --git a/src/univariate/categorical.jl b/src/univariate/categorical.jl index 211f1a154..8eed2c992 100644 --- a/src/univariate/categorical.jl +++ b/src/univariate/categorical.jl @@ -14,6 +14,7 @@ end ncategories(d::Categorical) = d.K probs(d::Categorical) = d.prob +probs(d::Categorical, rgn::UnitRange) = d.prob[rgn] ### handling support diff --git a/test/discrete_ref.csv b/test/discrete_ref.csv index 7ee061325..967fabd06 100644 --- a/test/discrete_ref.csv +++ b/test/discrete_ref.csv @@ -2,6 +2,8 @@ "Bernoulli(0.9)", 9.0000000000000002e-01, 8.9999999999999983e-02, 3.2508297339144820e-01, 1, 1, 1, -1.0536051565782628e-01, -1.0536051565782628e-01, -1.0536051565782628e-01 "Bernoulli(0.1)", 1.0000000000000001e-01, 9.0000000000000011e-02, 3.2508297339144820e-01, 0, 0, 0, -1.0536051565782631e-01, -1.0536051565782631e-01, -1.0536051565782631e-01 "Binomial(1, 0.5)", 5.0000000000000000e-01, 2.5000000000000000e-01, 6.9314718055994529e-01, 0, 0, 1, -6.9314718055994529e-01, -6.9314718055994529e-01, -6.9314718055994529e-01 +"Binomial(5, 0.4)", 2.0000000000000000e+00, 1.2000000000000000e+00, 1.4979981829038540e+00, 1, 2, 3, -1.3501553145040179e+00, -1.0624732420522367e+00, -1.4679383501604009e+00 +"Binomial(6, 0.8)", 4.8000000000000007e+00, 9.5999999999999996e-01, 1.3425774508710622e+00, 4, 5, 6, -1.4033998290228298e+00, -9.3339619977709365e-01, -1.3388613078852583e+00 "Binomial(100, 0.1)", 1.0000000000000000e+01, 9.0000000000000000e+00, 2.5112331161825550e+00, 8, 10, 12, -2.1643632354294020e+00, -2.0259739768661866e+00, -2.3147790140625677e+00 "Binomial(100, 0.9)", 9.0000000000000000e+01, 8.9999999999999982e+00, 2.5112331161825514e+00, 88, 90, 92, -2.3147790140625695e+00, -2.0259739768661902e+00, -2.1643632354294020e+00 "DiscreteUniform(0, 4)", 2.0000000000000000e+00, 2.0000000000000000e+00, 1.6094379124341003e+00, 1, 2, 3, -1.6094379124341003e+00, -1.6094379124341003e+00, -1.6094379124341003e+00 diff --git a/test/discrete_ref.txt b/test/discrete_ref.txt index 4b9deb896..d28d07683 100644 --- a/test/discrete_ref.txt +++ b/test/discrete_ref.txt @@ -2,6 +2,8 @@ Bernoulli(0.5) Bernoulli(0.9) Bernoulli(0.1) Binomial(1, 0.5) +Binomial(5, 0.4) +Binomial(6, 0.8) Binomial(100, 0.1) Binomial(100, 0.9) DiscreteUniform(0, 4) From 5d78b973737ebe426ef3015be8b5e83eda96a553 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 8 Nov 2014 16:31:45 +0800 Subject: [PATCH 2/7] add probs for DiscreteUniform --- src/univariate/discreteuniform.jl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/univariate/discreteuniform.jl b/src/univariate/discreteuniform.jl index fa355d450..41079c416 100644 --- a/src/univariate/discreteuniform.jl +++ b/src/univariate/discreteuniform.jl @@ -11,6 +11,17 @@ end DiscreteUniform(b::Integer) = DiscreteUniform(0, b) DiscreteUniform() = DiscreteUniform(0, 1) +function probs(d::DiscreteUniform) + n = d.b - d.a + 1 + fill(1.0 / n, n) +end + +function probs(d::DiscreteUniform, rgn::UnitRange) + f, l = rgn[1], rgn[end] + d.a <= f <= l <= d.b || throw(BoundsError()) + fill(1.0 / (d.b - d.a + 1), l - f + 1) +end + function cdf(d::DiscreteUniform, k::Real) k < d.a ? 0. : (k > d.b ? 1. : (ifloor(k) - d.a + 1.0) / (d.b - d.a + 1.0)) end From 901e39a945335f16ea02dded2556dd3dfbc75651 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 8 Nov 2014 16:40:05 +0800 Subject: [PATCH 3/7] add probs for Geometric --- src/univariate/geometric.jl | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/univariate/geometric.jl b/src/univariate/geometric.jl index f7f649b50..08b4fc11e 100644 --- a/src/univariate/geometric.jl +++ b/src/univariate/geometric.jl @@ -8,6 +8,20 @@ end Geometric() = Geometric(0.5) # Flips of a fair coin +function probs(d::Geometric, rgn::UnitRange) + f, l = rgn[1], rgn[end] + 0 <= f <= l || throw(BoundsError()) + p = d.prob + r = Array(Float64, l - f + 1) + pfail = 1.0 - p + r[1] = v = (pfail^f) * p + b = f - 1 + for i = f+1:l + r[i - b] = (v *= pfail) + end + return r +end + ## Support insupport(::Geometric, x::Real) = isinteger(x) && x >= zero(x) insupport(::Type{Geometric}, x::Real) = isinteger(x) && x >= zero(x) From 6c209dcb361980219a8b3c788866f629bde3e51d Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 8 Nov 2014 16:51:28 +0800 Subject: [PATCH 4/7] add probs to Hypergeometric --- src/univariate/hypergeometric.jl | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/univariate/hypergeometric.jl b/src/univariate/hypergeometric.jl index ff0427e95..77854dea4 100644 --- a/src/univariate/hypergeometric.jl +++ b/src/univariate/hypergeometric.jl @@ -26,6 +26,31 @@ support(d::Hypergeometric) = minimum(d):maximum(d) islowerbounded(d::Hypergeometric) = true isupperbounded(d::Hypergeometric) = true +function _probs(d::Hypergeometric, f::Int, l::Int) + ns = d.ns + nf = d.nf + n = d.n + r = Array(Float64, l - f + 1) + r[1] = v = pdf(d, f) + b = f - 1 + if l > f + for x = f+1:l + c = ((ns - x + 1) / x) * ((n - x + 1) / (nf - n + x)) + r[x-b] = (v *= c) + end + end + return r +end + +probs(d::Hypergeometric) = _probs(d, minimum(d), maximum(d)) + +function probs(d::Hypergeometric, rgn::UnitRange) + f, l = rgn[1], rgn[end] + minimum(d) <= f <= l <= maximum(d) || throw(BoundsError()) + _probs(d, f, l) +end + + # properties mean(d::Hypergeometric) = d.n * d.ns / (d.ns + d.nf) From b9d6e24e777bd21e9ce8d9ec00479ffb2bbe6035 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 8 Nov 2014 17:07:41 +0800 Subject: [PATCH 5/7] add probs to NegativeBinomial --- src/univariate/negativebinomial.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/univariate/negativebinomial.jl b/src/univariate/negativebinomial.jl index f4916da78..65a98893f 100644 --- a/src/univariate/negativebinomial.jl +++ b/src/univariate/negativebinomial.jl @@ -29,6 +29,21 @@ maximum(::Union(NegativeBinomial, Type{NegativeBinomial})) = Inf insupport(::NegativeBinomial, x::Real) = isinteger(x) && zero(x) <= x insupport(::Type{NegativeBinomial}, x::Real) = isinteger(x) && zero(x) <= x +function probs(d::NegativeBinomial, rgn::UnitRange) + r = d.r + p0 = 1.0 - d.prob + f, l = rgn[1], rgn[end] + 0 <= f <= l || throw(BoundsError()) + res = Array(Float64, l - f + 1) + res[1] = v = pdf(d, f) + b = f - 1 + for x = f+1:l + c = (x + r - 1) * p0 / x + res[x-b] = (v *= c) + end + return res +end + function mgf(d::NegativeBinomial, t::Real) r, p = d.r, d.prob return ((1.0 - p) * exp(t))^r / (1.0 - p * exp(t))^r From 2afa38d27ddc82ae997a87150c06cf85523cf3a8 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 8 Nov 2014 17:14:02 +0800 Subject: [PATCH 6/7] add probs to lambda --- src/univariate/poisson.jl | 64 +++++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/src/univariate/poisson.jl b/src/univariate/poisson.jl index 104224fdf..0d8d8c5f8 100644 --- a/src/univariate/poisson.jl +++ b/src/univariate/poisson.jl @@ -9,21 +9,6 @@ end @_jl_dist_1p Poisson pois -function entropy(d::Poisson) - λ = d.lambda - if λ < 50.0 - s = 0.0 - for k in 1:100 - s += λ^k * lgamma(k + 1.0) / gamma(k + 1.0) - end - return λ * (1.0 - log(λ)) + exp(-λ) * s - else - return 0.5 * log(2 * pi * e * λ) - - (1 / (12 * λ)) - - (1 / (24 * λ * λ)) - - (19 / (360 * λ * λ * λ)) - end -end isupperbounded(::Union(Poisson, Type{Poisson})) = false islowerbounded(::Union(Poisson, Type{Poisson})) = true @@ -35,12 +20,52 @@ maximum(::Union(Poisson, Type{Poisson})) = Inf insupport(::Poisson, x::Real) = isinteger(x) && zero(x) <= x insupport(::Type{Poisson}, x::Real) = isinteger(x) && zero(x) <= x -kurtosis(d::Poisson) = 1.0 / d.lambda + +function probs(d::Poisson, rgn::UnitRange) + λ = d.lambda + f, l = rgn[1], rgn[end] + 0 <= f <= l || throw(BoundsError()) + r = Array(Float64, l - f + 1) + v = r[1] = pdf(d, f) + if l > f + b = f - 1 + for x = f+1:l + c = λ / x + r[x - b] = (v *= c) + end + end + return r +end mean(d::Poisson) = d.lambda median(d::Poisson) = quantile(d, 0.5) +mode(d::Poisson) = ifloor(d.lambda) +modes(d::Poisson) = isinteger(d.lambda) ? [int(d.lambda)-1,int(d.lambda)] : [ifloor(d.lambda)] + +var(d::Poisson) = d.lambda + +skewness(d::Poisson) = 1.0 / sqrt(d.lambda) + +kurtosis(d::Poisson) = 1.0 / d.lambda + +function entropy(d::Poisson) + λ = d.lambda + if λ < 50.0 + s = 0.0 + for k in 1:100 + s += λ^k * lgamma(k + 1.0) / gamma(k + 1.0) + end + return λ * (1.0 - log(λ)) + exp(-λ) * s + else + return 0.5 * log(2 * pi * e * λ) - + (1 / (12 * λ)) - + (1 / (24 * λ * λ)) - + (19 / (360 * λ * λ * λ)) + end +end + function mgf(d::Poisson, t::Real) l = d.lambda return exp(l * (exp(t) - 1.0)) @@ -51,13 +76,6 @@ function cf(d::Poisson, t::Real) return exp(l * (exp(im * t) - 1.0)) end -mode(d::Poisson) = ifloor(d.lambda) -modes(d::Poisson) = isinteger(d.lambda) ? [int(d.lambda)-1,int(d.lambda)] : [ifloor(d.lambda)] - -skewness(d::Poisson) = 1.0 / sqrt(d.lambda) - -var(d::Poisson) = d.lambda - # model fitting immutable PoissonStats <: SufficientStats From 1efccbeea7355cb90c5a70075fa4337b8230c1aa Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Sat, 8 Nov 2014 17:19:49 +0800 Subject: [PATCH 7/7] minor update of testutils --- src/testutils.jl | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/testutils.jl b/src/testutils.jl index 109939248..07abfbe7b 100644 --- a/src/testutils.jl +++ b/src/testutils.jl @@ -83,10 +83,8 @@ function test_samples(s::Sampleable{Univariate, Discrete}, # the sampleable # check the consistency between probs and pdf if isa(s, Distribution) - if applicable(probs, s, rmin:rmax) - @test_approx_eq probs(s, rmin:rmax) p0 - end - if applicable(probs, s) + @test_approx_eq probs(s, rmin:rmax) p0 + if isbounded(s) @assert isfinite(vmin) && isfinite(vmax) @test_approx_eq probs(s) probs(s, vmin:vmax) end