Skip to content

Commit

Permalink
Merge pull request JuliaLang#212 from JuliaStats/anj/noGPL
Browse files Browse the repository at this point in the history
Remove dependency on StatsFuns
  • Loading branch information
andreasnoack authored Sep 13, 2016
2 parents add47e6 + cba3eb0 commit cc384ef
Show file tree
Hide file tree
Showing 7 changed files with 11 additions and 135 deletions.
3 changes: 1 addition & 2 deletions LICENSE.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
StatsBase.jl is licensed under the MIT License:

> Copyright (c) 2012-2013: Dahua Lin, Simon Byrne, Andreas Noack Jensen,
> Copyright (c) 2012-2016: Dahua Lin, Simon Byrne, Andreas Noack,
> Douglas Bates, John Myles White, Simon Kornblith, and other contributors.
> Permission is hereby granted, free of charge, to any person obtaining
Expand All @@ -21,4 +21,3 @@ StatsBase.jl is licensed under the MIT License:
> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1 change: 0 additions & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
julia 0.4
Compat 0.8.4
StatsFuns 0.3.0
16 changes: 0 additions & 16 deletions docs/source/sampling.rst
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,6 @@ All following functions write results to ``x`` (pre-allocated) and return ``x``.

This algorithm consumes ``k`` random numbers.

.. function:: xmultinom_sample!(a, x)

*Expanded multinomial sampling.*

For each element in ``a``, draw the number of occurrences from a binomial distribution, and fill this element to ``x`` for the chosen number of times. The output values are inherently ordered.

This algorithm consumes ``n`` binomial-distributed random numbers. It is very efficient when ``k`` is considerably greater than ``n``.

.. function:: samplepair(a)

Pick two elements at distinct positions from ``a``, and return them as a pair.
Expand Down Expand Up @@ -160,14 +152,6 @@ All following functions write results to ``x`` (pre-allocated) and return ``x``.

This algorithm takes ``O(n log n)`` time for building the alias table, and then ``O(1)`` to draw each sample. It consumes ``2 k`` random numbers.

.. function:: xmultinom_sample!(a, wv, x)

*Expanded Multinomial sampling.*

Like the ``xmultinom_sample!`` method for non-weighted cases, except that the weights are taking into account when computing the probabilities for drawing from binomial distributions.

This algorithm consumes ``O(n)`` random numbers. Very fast when ``k >> n``.

.. function:: naive_wsample_norep!(a, wv, x)

Naive implementation of weighted sampling without replacement.
Expand Down
20 changes: 1 addition & 19 deletions src/StatsBase.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__precompile__(true)
__precompile__()

module StatsBase
using Compat
Expand All @@ -13,26 +13,8 @@ module StatsBase

## tackle compatibility issues

## import mathfuns, which were migrated to StatsFuns
import StatsFuns: xlogx, xlogy, logistic, logit,
softplus, invsoftplus,
logsumexp, softmax, softmax!

import StatsFuns: RFunctions.binomrand

export

## mathfuns (TODO: removed after a certain period)
xlogx, # x * log(x)
xlogy, # x * log(y)
logistic, # 1 / (1 + exp(-x))
logit, # log(x / (1 - x))
softplus, # log(1 + exp(x))
invsoftplus, # log(exp(x) - 1)
logsumexp, # log(exp(x) + exp(y)) or log(sum(exp(x)))
softmax,
softmax!,

## weights
WeightVec, # the type to represent a weight vector
weights, # construct a weight vector
Expand Down
93 changes: 7 additions & 86 deletions src/sampling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -37,41 +37,6 @@ function direct_sample!(a::AbstractArray, x::AbstractArray)
return x
end

# Expanded Multinomial sampling
#
# for each element in a, we draw the number of its
# occurrences, and fill this element to x for
# this number of times.
#
function xmultinom_sample!(a::AbstractArray, x::AbstractArray)
n = length(a)
k = length(x)
offset = 0
i = 1
while offset < k
rk = k - offset
if i == n
@inbounds ai = a[i]
for j = 1:rk
@inbounds x[offset + j] = ai
end
offset = k
else
m = Int(binomrand(rk, 1.0 / (n - i + 1)))
if m > 0
@inbounds ai = a[i]
for j = 1:m
@inbounds x[offset + j] = a[i]
end
offset += m
end
i += 1
end
end
x
end


### draw a pair of distinct integers in [1:n]

"""
Expand Down Expand Up @@ -310,11 +275,7 @@ function sample!(a::AbstractArray, x::AbstractArray; replace::Bool=true, ordered

if replace # with replacement
if ordered
if k > 10 * n
xmultinom_sample!(a, x)
else
sort!(direct_sample!(a, x))
end
sort!(direct_sample!(a, x))
else
direct_sample!(a, x)
end
Expand Down Expand Up @@ -485,38 +446,6 @@ function alias_sample!(a::AbstractArray, wv::WeightVec, x::AbstractArray)
return x
end

function xmultinom_sample!(a::AbstractArray, wv::WeightVec, x::AbstractArray)
n = length(a)
length(wv) == n || throw(DimensionMismatch("Inconsistent lengths."))
k = length(x)
offset = 0
i = 1
wsum = sum(wv)
w = values(wv)

while offset < k
rk = k - offset
wi = w[i]

if i == n || wi >= wsum
for j = 1 : rk
@inbounds x[offset + j] = a[i]
end
offset = k
else
m = Int(binomrand(rk, wi / wsum))
for j = 1 : m
@inbounds x[offset + j] = a[i]
end
i += 1
wsum -= wi
offset += m
end
end
return x
end


function naive_wsample_norep!(a::AbstractArray, wv::WeightVec, x::AbstractArray)
n = length(a)
length(wv) == n || throw(DimensionMismatch("Inconsistent lengths."))
Expand Down Expand Up @@ -549,24 +478,16 @@ function sample!(a::AbstractArray, wv::WeightVec, x::AbstractArray;

if replace
if ordered
if k < n && k < 256
sort!(direct_sample!(a, wv, x))
else
xmultinom_sample!(a, wv, x)
end
sort!(direct_sample!(a, wv, x))
else
if k > 3 * n
shuffle!(xmultinom_sample!(a, wv, x))
if n < 40
direct_sample!(a, wv, x)
else
if n < 40
t = ifelse(n < 500, 64, 32)
if k < t
direct_sample!(a, wv, x)
else
t = ifelse(n < 500, 64, 32)
if k < t
direct_sample!(a, wv, x)
else
alias_sample!(a, wv, x)
end
alias_sample!(a, wv, x)
end
end
end
Expand Down
8 changes: 1 addition & 7 deletions test/sampling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ function check_sample_wrep(a::AbstractArray, vrgn, ptol::Real; ordered::Bool=fal
end
end

import StatsBase: direct_sample!, xmultinom_sample!
import StatsBase: direct_sample!

a = direct_sample!(1:10, zeros(Int, n, 3))
check_sample_wrep(a, (1, 10), 5.0e-3; ordered=false)
Expand All @@ -58,12 +58,6 @@ check_sample_wrep(a, (3, 12), 5.0e-3; ordered=false)
a = direct_sample!([11:20;], zeros(Int, n, 3))
check_sample_wrep(a, (11, 20), 5.0e-3; ordered=false)

a = xmultinom_sample!(3:12, zeros(Int, n))
check_sample_wrep(a, (3, 12), 5.0e-3; ordered=true)

a = xmultinom_sample!(101:200, zeros(Int, 10))
check_sample_wrep(a, (101, 200), 0; ordered=true)

a = sample(3:12, n)
check_sample_wrep(a, (3, 12), 5.0e-3; ordered=false)

Expand Down
5 changes: 1 addition & 4 deletions test/wsampling.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ function check_wsample_wrep(a::AbstractArray, vrgn, wv::WeightVec, ptol::Real; o
end
end

import StatsBase: direct_sample!, alias_sample!, xmultinom_sample!
import StatsBase: direct_sample!, alias_sample!

n = 10^5
wv = weights([0.2, 0.8, 0.4, 0.6])
Expand All @@ -43,9 +43,6 @@ check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=false)
a = alias_sample!(4:7, wv, zeros(Int, n, 3))
check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=false)

a = xmultinom_sample!(4:7, wv, zeros(Int, n))
check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=true)

a = sample(4:7, wv, n; ordered=false)
check_wsample_wrep(a, (4, 7), wv, 5.0e-3; ordered=false)

Expand Down

0 comments on commit cc384ef

Please sign in to comment.