Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring reducedim and related functions #7106

Merged
merged 4 commits into from
Jun 5, 2014
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions base/bitarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1385,8 +1385,7 @@ end

## Reductions ##

sum(A::BitArray, region) = reducedim(+, A, region, 0, Array(Int,reduced_dims(A,region)))

sum(A::BitArray, region) = reducedim(AddFun(), A, region)
sum(B::BitArray) = countnz(B)

function all(B::BitArray)
Expand Down
309 changes: 162 additions & 147 deletions base/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,11 @@ reduced_dims(a::AbstractArray, region) = reduced_dims(size(a), region)

# for reductions that keep 0 dims as 0
reduced_dims0(a::AbstractArray, region) = reduced_dims0(size(a), region)

reduced_dims{N}(siz::NTuple{N,Int}, d::Int, rd::Int) = (d == 1 ? tuple(rd, siz[d+1:N]...) :
d == N ? tuple(siz[1:N-1]..., rd) :
1 < d < N ? tuple(siz[1:d-1]..., rd, siz[d+1:N]...) :
siz)::typeof(siz)

reduced_dims{N}(siz::NTuple{N,Int}, d::Int) = reduced_dims(siz, d, 1)

reduced_dims0{N}(siz::NTuple{N,Int}, d::Int) = 1 <= d <= N ? reduced_dims(siz, d, (siz[d] == 0 ? 0 : 1)) : siz

function reduced_dims{N}(siz::NTuple{N,Int}, region)
Expand Down Expand Up @@ -46,184 +43,200 @@ end

###### Generic reduction functions #####

reducedim(f::Function, A, region, initial) = reducedim!(f, reduction_init(A, region, initial), A)

reducedim(f::Function, A, region, initial, R) = reducedim!(f, fill!(R, initial), A)

function reducedim!_function(N::Int, f::Function)
body = gen_reduction_body(N, f)
@eval begin
local _F_
function _F_(R, A)
$body
end
_F_
end
end

let reducedim_cache = Dict()
# reducedim! assumes that R has already been initialized with a seed value
global reducedim!
function reducedim!(f::Function, R, A)
if isempty(R)
return R
end
ndimsA = ndims(A)
key = (ndimsA, f)
if !haskey(reducedim_cache,key)
func = reducedim!_function(ndimsA, f)
reducedim_cache[key] = func
else
func = reducedim_cache[key]
end
func(R, A)::typeof(R)
end
end # let reducedim_cache
## initialization

# Generate the body for a reduction function reduce!(f, R, A), using binary operation f,
# where R is the output and A is the input.
# R must have already been set to the appropriate size and initialized with the seed value
function gen_reduction_body(N, f::Function)
F = Expr(:quote, f)
quote
(isempty(R) || isempty(A)) && return R
for i = 1:$N
(size(R, i) == size(A, i) || size(R, i) == 1) || throw(DimensionMismatch("Reduction on array of size $(size(A)) with output of size $(size(R))"))
end
@nextract $N sizeR d->size(R,d)
# If we're reducing along dimension 1, for efficiency we can make use of a temporary.
# Otherwise, keep the result in R so that we traverse A in storage order.
if size(R, 1) < size(A, 1)
@nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin
@inbounds tmp = (@nref $N R j)
for i_1 = 1:size(A,1)
@inbounds tmp = ($F)(tmp, (@nref $N A i))
end
@inbounds (@nref $N R j) = tmp
end
else
@nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin
@inbounds (@nref $N R j) = ($F)((@nref $N R j), (@nref $N A i))
end
end
R
end
for (Op, initfun) in ((:AddFun, :zero), (:MulFun, :one), (:MaxFun, :typemin), (:MinFun, :typemax))
@eval initarray!{T}(a::AbstractArray{T}, ::$(Op), init::Bool) = (init && fill!(a, $(initfun)(T)); a)
end

reduction_init{T}(A::AbstractArray, region, initial::T, Tr=T) = fill!(similar(A,Tr,reduced_dims(A,region)), initial)

function initarray!{T}(a::AbstractArray{T}, v::T, init::Bool)
if init
fill!(a, v)
end
return a
for (Op, initval) in ((:AndFun, true), (:OrFun, false))
@eval initarray!(a::AbstractArray, ::$(Op), init::Bool) = (init && fill!(a, $initval); a)
end

reducedim_initarray{R}(A::AbstractArray, region, v0, ::Type{R}) = fill!(similar(A,R,reduced_dims(A,region)), v0)
reducedim_initarray{T}(A::AbstractArray, region, v0::T) = reducedim_initarray(A, region, v0, T)

##### Specific reduction functions #####
reducedim_initarray0{R}(A::AbstractArray, region, v0, ::Type{R}) = fill!(similar(A,R,reduced_dims0(A,region)), v0)
reducedim_initarray0{T}(A::AbstractArray, region, v0::T) = reducedim_initarray0(A, region, v0, T)

## sum

@ngenerate N typeof(R) function _sum!{T,N}(f, R::AbstractArray, A::AbstractArray{T,N})
(isempty(R) || isempty(A)) && return R
rdims = 0
for i = 1:N
if size(R, i) == size(A, i)
elseif size(R, i) == 1
rdims += 1
else
throw(DimensionMismatch("sum of array of size $(size(A)) with output of size $(size(R))"))
end
end
@nextract N sizeR d->size(R,d)
# If we're reducing along dimension 1 and dimension 1 is
# sufficiently large, use the pairwise implementation. Otherwise,
# keep the result in R so that we traverse A in storage order.
sz1 = size(A, 1)
if size(R, 1) < sz1 && sz1 >= 16 && rdims == 1
for i = 1:div(length(A), sz1)
@inbounds R[i] = mapreduce_impl(f, AddFun(), A, (i-1)*sz1+1, i*sz1)
end
# TODO: better way to handle reducedim initialization
#
# The current scheme is basically following Steven G. Johnson's original implementation
#
function reducedim_init{T}(f, op::AddFun, A::AbstractArray{T}, region)
if method_exists(zero, (Type{T},))
x = evaluate(f, zero(T))
z = zero(x) + zero(x)
Tr = typeof(z) == typeof(x) && !isbits(T) ? T : typeof(z)
else
@nloops N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin
@inbounds (@nref N R j) += evaluate(f, @nref N A i)
end
z = zero(sum(f, A))
Tr = typeof(z)
end
R
return reducedim_initarray(A, region, z, Tr)
end

function sum{T}(f::Union(Function,Func{1}), A::AbstractArray{T}, region)
function reducedim_init{T}(f, op::MulFun, A::AbstractArray{T}, region)
if method_exists(zero, (Type{T},))
fz = evaluate(f, zero(T))
z = fz + fz
Tr = typeof(z) == typeof(fz) && !isbits(T) ? T : typeof(z)
x = evaluate(f, zero(T))
z = one(x) * one(x)
Tr = typeof(z) == typeof(x) && !isbits(T) ? T : typeof(z)
else
# TODO: handle more heterogeneous sums. e.g. sum(A, 1) where
# A is a Matrix{Any} with one column of numbers and one of vectors
z = zero(sum(f, A))
z = one(prod(f, A))
Tr = typeof(z)
end
_sum!(f, reduction_init(A, region, z, Tr), A)
return reducedim_initarray(A, region, z, Tr)
end

sum!{R}(f::Union(Function,Func{1}), r::AbstractArray{R}, A::AbstractArray; init::Bool=true) =
_sum!(f, initarray!(r, zero(R), init), A)
reducedim_init{T}(f, op::MaxFun, A::AbstractArray{T}, region) = reducedim_initarray0(A, region, typemin(evaluate(f, zero(T))))
reducedim_init{T}(f, op::MinFun, A::AbstractArray{T}, region) = reducedim_initarray0(A, region, typemax(evaluate(f, zero(T))))
reducedim_init{T}(f::Union(AbsFun,Abs2Fun), op::MaxFun, A::AbstractArray{T}, region) =
reducedim_initarray(A, region, zero(evaluate(f, zero(T))))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be reducedim_initarray0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When a is empty, zero is a reasonable result for maxabs(a).


reducedim_init(f, op::AndFun, A::AbstractArray, region) = reducedim_initarray(A, region, true)
reducedim_init(f, op::OrFun, A::AbstractArray, region) = reducedim_initarray(A, region, false)

# specialize to make initialization more efficient for common cases

typealias CommonReduceResult Union(Uint64,Uint128,Int64,Int128,Float32,Float64,Complex64,Complex128)

for (fname, func) in ((:sum, :IdFun), (:sumabs, :AbsFun), (:sumabs2, :Abs2Fun))
for (IT, RT) in ((:CommonReduceResult, :T), (:SmallSigned, :Int), (:SmallUnsigned, :Uint))
@eval begin
$fname(A::AbstractArray, region) = sum($func(), A, region)
$(symbol("$(fname)!")){R}(r::AbstractArray{R}, A::AbstractArray; init::Bool=true) =
sum!($func(), r, A; init=init)
reducedim_init{T<:$IT}(f::Union(IdFun,AbsFun,Abs2Fun), op::AddFun, A::AbstractArray{T}, region) =
reducedim_initarray(A, region, zero($RT))
reducedim_init{T<:$IT}(f::Union(IdFun,AbsFun,Abs2Fun), op::MulFun, A::AbstractArray{T}, region) =
reducedim_initarray(A, region, one($RT))
end
end
reducedim_init(f::Union(IdFun,AbsFun,Abs2Fun), op::AddFun, A::AbstractArray{Bool}, region) =
reducedim_initarray(A, region, 0)


## generic (map)reduction

has_fast_linear_indexing(a::AbstractArray) = false
has_fast_linear_indexing(a::Array) = true

function check_reducdims(R, A)
# Check whether R has compatible dimensions w.r.t. A for reduction
#
# It returns an integer value value (useful for choosing implementation)
# - If it reduces only along leading dimensions, e.g. sum(A, 1) or sum(A, (1, 2)),
# it returns the length of the leading slice. For the two examples above,
# it will be size(A, 1) or size(A, 1) * size(A, 2).
# - Otherwise, e.g. sum(A, 2) or sum(A, (1, 3)), it returns 0.
#
lsiz = 1
had_nonreduc = false
for i = 1:ndims(A)
sRi = size(R, i)
sAi = size(A, i)
if sRi == 1
if sAi > 1
if had_nonreduc
lsiz = 0 # to reduce along i, but some previous dimensions were non-reducing
else
lsiz *= sAi # if lsiz was set to zero, it will stay to be zero
end
end
else
sRi == sAi ||
throw(DimensionMismatch("Reduction on array of size $(size(A)) with output of size $(size(R))"))
had_nonreduc = true
end
end
return lsiz
end

function mapreducedim_body(N::Int)
quote
lsiz = check_reducdims(R, A)
isempty(A) && return R
@nextract $N sizeR d->size(R,d)
sizA1 = size(A, 1)

if has_fast_linear_indexing(A) && lsiz > 16
# use mapreduce_impl, which is probably better tuned to achieve higher performance
nslices = div(length(A), lsiz)
ibase = 0
for i = 1:nslices
@inbounds R[i] = mapreduce_impl(f, op, A, ibase+1, ibase+lsiz)
ibase += lsiz
end
elseif size(R, 1) == 1 && sizA1 > 1
# keep the accumulator as a local variable when reducing along the first dimension
@nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin
@inbounds r = (@nref $N R j)
for i_1 = 1:sizA1
@inbounds v = evaluate(f, (@nref $N A i))
r = evaluate(op, r, v)
end
@inbounds (@nref $N R j) = r
end
else
# general implementation
@nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin
@inbounds v = evaluate(f, (@nref $N A i))
@inbounds (@nref $N R j) = evaluate(op, (@nref $N R j), v)
end
end
return R
end
end
eval(ngenerate(:N, :(typeof(R)),
:(_mapreducedim!{T,N}(f, op, R::AbstractArray, A::AbstractArray{T,N})), mapreducedim_body))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can be simplified to use the standard syntax for @ngenerate instead of eval(ngenerate(.... See the Cartesian docs and multidimensional.jl.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is doable. Just that personally, I feel it is more organized (and easier to debug) to have a separated body-generating function (especially when the body generating code is substantial).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

macroexpand makes it less important to have a separate body-generating function. The main reason for the pain of eval(ngenerate(... is when your dict lookup sometimes has to be more complicated than just the dimensionality.


## prod

eval(ngenerate(:N, :(typeof(R)), :(_prod!{T,N}(R::AbstractArray, A::AbstractArray{T,N})), N->gen_reduction_body(N, *)))
prod!{R}(r::AbstractArray{R}, A::AbstractArray; init::Bool=true) = _prod!(initarray!(r, one(R), init), A)
mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) = _mapreducedim!(f, op, R, A)

function prod{T}(A::AbstractArray{T}, region)
if method_exists(one, (Type{T},))
z = one(T) * one(T)
Tr = typeof(z) == typeof(one(T)) ? T : typeof(z)
else
# TODO: handle more heterogeneous products. e.g. prod(A, 1) where
# A is a Matrix{Any} with one column of numbers and one of vectors
z = one(prod(A))
Tr = typeof(z)
end
_prod!(reduction_init(A, region, z, Tr), A)
function mapreducedim!(f::Function, op, R::AbstractArray, A::AbstractArray)
is(op, +) ? _mapreducedim!(f, AddFun(), R, A) :
is(op, *) ? _mapreducedim!(f, MulFun(), R, A) :
is(op, &) ? _mapreducedim!(f, AndFun(), R, A) :
is(op, |) ? _mapreducedim!(f, OrFun(), R, A) :
_mapreducedim!(f, op, R, A)
end

prod(A::AbstractArray{Bool}, region) = error("use all() instead of prod() for boolean arrays")
reducedim!{RT}(op, R::AbstractArray{RT}, A::AbstractArray) = mapreducedim!(IdFun(), op, R, A, zero(RT))

mapreducedim(f, op, A::AbstractArray, region, v0) = mapreducedim!(f, op, reducedim_initarray(A, region, v0), A)
mapreducedim{T}(f, op, A::AbstractArray{T}, region) = mapreducedim!(f, op, reducedim_init(f, op, A, region), A)

## maximum & minimum
reducedim(op, A::AbstractArray, region, v0) = mapreducedim(IdFun(), op, A, region, v0)
reducedim(op, A::AbstractArray, region) = mapreducedim(IdFun(), op, A, region)

eval(ngenerate(:N, :(typeof(R)), :(_maximum!{T,N}(R::AbstractArray, A::AbstractArray{T,N})), N->gen_reduction_body(N, scalarmax)))
maximum!{R}(r::AbstractArray{R}, A::AbstractArray; init::Bool=true) = _maximum!(initarray!(r, typemin(R), init), A)
maximum{T}(A::AbstractArray{T}, region) =
isempty(A) ? similar(A,reduced_dims0(A,region)) : _maximum!(reduction_init(A, region, typemin(T)), A)

eval(ngenerate(:N, :(typeof(R)), :(_minimum!{T,N}(R::AbstractArray, A::AbstractArray{T,N})), N->gen_reduction_body(N, scalarmin)))
minimum!{R}(r::AbstractArray{R}, A::AbstractArray; init::Bool=true) = _minimum!(initarray!(r, typemax(R), init), A)
minimum{T}(A::AbstractArray{T}, region) =
isempty(A) ? similar(A, reduced_dims0(A, region)) : _minimum!(reduction_init(A, region, typemax(T)), A)
##### Specific reduction functions #####

for (fname, Op) in [(:sum, :AddFun), (:prod, :MulFun),
(:maximum, :MaxFun), (:minimum, :MinFun),
(:all, :AndFun), (:any, :OrFun)]

## all & any
fname! = symbol(string(fname, '!'))
@eval begin
$(fname!)(f::Union(Function,Func{1}), r::AbstractArray, A::AbstractArray; init::Bool=true) =
mapreducedim!(f, $(Op)(), initarray!(r, $(Op)(), init), A)
$(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(IdFun(), r, A; init=init)

eval(ngenerate(:N, :(typeof(R)), :(_all!{N}(R::AbstractArray, A::AbstractArray{Bool,N})), N->gen_reduction_body(N, &)))
all!(r::AbstractArray, A::AbstractArray{Bool}; init::Bool=true) = _all!(initarray!(r, true, init), A)
all(A::AbstractArray{Bool}, region) = _all!(reduction_init(A, region, true), A)
$(fname)(f::Union(Function,Func{1}), A::AbstractArray, region) =
mapreducedim(f, $(Op)(), A, region)
$(fname)(A::AbstractArray, region) = $(fname)(IdFun(), A, region)
end
end

eval(ngenerate(:N, :(typeof(R)), :(_any!{N}(R::AbstractArray, A::AbstractArray{Bool,N})), N->gen_reduction_body(N, |)))
any!(r::AbstractArray, A::AbstractArray{Bool}; init::Bool=true) = _any!(initarray!(r, false, init), A)
any(A::AbstractArray{Bool}, region) = _any!(reduction_init(A, region, false), A)
for (fname, fbase, Fun) in [(:sumabs, :sum, :AbsFun),
(:sumabs2, :sum, :Abs2Fun),
(:maxabs, :maximum, :AbsFun),
(:minabs, :minimum, :AbsFun)]
fname! = symbol(string(fname, '!'))
fbase! = symbol(string(fbase, '!'))
@eval begin
$(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) =
$(fbase!)($(Fun)(), r, A; init=init)
$(fname)(A::AbstractArray, region) = $(fbase)($(Fun)(), A, region)
end
end


## findmin & findmax
##### findmin & findmax #####

# Generate the body for a reduction function reduce!(f, Rval, Rind, A), using a comparison operator f
# Rind contains the index of A from which Rval was taken
Expand Down Expand Up @@ -272,10 +285,12 @@ eval(ngenerate(:N, :(typeof((Rval,Rind))), :(_findmin!{T,N}(Rval::AbstractArray,
findmin!{R}(rval::AbstractArray{R}, rind::AbstractArray, A::AbstractArray; init::Bool=true) = _findmin!(initarray!(rval, typemax(R), init), rind, A)
findmin{T}(A::AbstractArray{T}, region) =
isempty(A) ? (similar(A,reduced_dims0(A,region)), zeros(Int,reduced_dims0(A,region))) :
_findmin!(reduction_init(A, region, typemax(T)), zeros(Int,reduced_dims0(A,region)), A)
_findmin!(reducedim_initarray0(A, region, typemax(T)), zeros(Int,reduced_dims0(A,region)), A)

eval(ngenerate(:N, :(typeof((Rval,Rind))), :(_findmax!{T,N}(Rval::AbstractArray, Rind::AbstractArray, A::AbstractArray{T,N})), N->gen_findreduction_body(N, >)))
findmax!{R}(rval::AbstractArray{R}, rind::AbstractArray, A::AbstractArray; init::Bool=true) = _findmax!(initarray!(rval, typemin(R), init), rind, A)
findmax{T}(A::AbstractArray{T}, region) =
isempty(A) ? (similar(A,reduced_dims0(A,region)), zeros(Int,reduced_dims0(A,region))) :
_findmax!(reduction_init(A, region, typemin(T)), zeros(Int,reduced_dims0(A,region)), A)
_findmax!(reducedim_initarray0(A, region, typemin(T)), zeros(Int,reduced_dims0(A,region)), A)


4 changes: 2 additions & 2 deletions test/reducedim.jl
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ safe_sumabs2{T}(A::Array{T}, region) = safe_mapslices(sum, abs2(A), region)

Areduc = rand(3, 4, 5, 6)
for region in {
1, 2, 3, 4, 5, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4),
1, 2, 3, 4, 5, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4),
(1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)}

# println("region = $region")
r = fill(NaN, Base.reduced_dims(size(Areduc), region))
@test_approx_eq sum!(r, Areduc) safe_sum(Areduc, region)
@test_approx_eq prod!(r, Areduc) safe_prod(Areduc, region)
Expand Down