From b3f52189c6e766ea3be7a469f481c87523f0a782 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Tue, 3 Jun 2014 15:47:23 -0500 Subject: [PATCH 1/4] Refactoring reducedim and related functions --- base/bitarray.jl | 3 +- base/reducedim.jl | 311 ++++++++++++++++++++++++---------------------- test/reducedim.jl | 4 +- 3 files changed, 167 insertions(+), 151 deletions(-) diff --git a/base/bitarray.jl b/base/bitarray.jl index b17a2c11c3290..e3a523dc9ff33 100644 --- a/base/bitarray.jl +++ b/base/bitarray.jl @@ -1385,8 +1385,7 @@ end ## Reductions ## -sum(A::BitArray, region) = reducedim(+, A, region, 0, Array(Int,reduced_dims(A,region))) - +sum(A::BitArray, region) = reducedim(AddFun(), A, region) sum(B::BitArray) = countnz(B) function all(B::BitArray) diff --git a/base/reducedim.jl b/base/reducedim.jl index 5e562ace3f1e8..807e153f6cc5a 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -5,14 +5,11 @@ reduced_dims(a::AbstractArray, region) = reduced_dims(size(a), region) # for reductions that keep 0 dims as 0 reduced_dims0(a::AbstractArray, region) = reduced_dims0(size(a), region) - reduced_dims{N}(siz::NTuple{N,Int}, d::Int, rd::Int) = (d == 1 ? tuple(rd, siz[d+1:N]...) : d == N ? tuple(siz[1:N-1]..., rd) : 1 < d < N ? tuple(siz[1:d-1]..., rd, siz[d+1:N]...) : siz)::typeof(siz) - reduced_dims{N}(siz::NTuple{N,Int}, d::Int) = reduced_dims(siz, d, 1) - reduced_dims0{N}(siz::NTuple{N,Int}, d::Int) = 1 <= d <= N ? reduced_dims(siz, d, (siz[d] == 0 ? 0 : 1)) : siz function reduced_dims{N}(siz::NTuple{N,Int}, region) @@ -46,184 +43,202 @@ end ###### Generic reduction functions ##### -reducedim(f::Function, A, region, initial) = reducedim!(f, reduction_init(A, region, initial), A) +## initialization -reducedim(f::Function, A, region, initial, R) = reducedim!(f, fill!(R, initial), A) +for (Op, initfun) in ((:AddFun, :zero), (:MulFun, :one), (:MaxFun, :typemin), (:MinFun, :typemax)) + @eval initarray!{T}(a::AbstractArray{T}, ::$(Op), init::Bool) = (init && fill!(a, $(initfun)(T)); a) +end -function reducedim!_function(N::Int, f::Function) - body = gen_reduction_body(N, f) - @eval begin - local _F_ - function _F_(R, A) - $body - end - _F_ - end +for (Op, initval) in ((:AndFun, true), (:OrFun, false)) + @eval initarray!(a::AbstractArray, ::$(Op), init::Bool) = (init && fill!(a, $initval); a) end -let reducedim_cache = Dict() -# reducedim! assumes that R has already been initialized with a seed value -global reducedim! -function reducedim!(f::Function, R, A) - if isempty(R) - return R +reducedim_initarray{R}(A::AbstractArray, region, v0, ::Type{R}) = fill!(similar(A,R,reduced_dims(A,region)), v0) +reducedim_initarray{T}(A::AbstractArray, region, v0::T) = reducedim_initarray(A, region, v0, T) + +reducedim_initarray0{R}(A::AbstractArray, region, v0, ::Type{R}) = fill!(similar(A,R,reduced_dims0(A,region)), v0) +reducedim_initarray0{T}(A::AbstractArray, region, v0::T) = reducedim_initarray0(A, region, v0, T) + +# TODO: better way to handle reducedim initialization +# +# The current scheme is basically following Steven G. Johnson's original implementation +# +function reducedim_init{T}(f, op::AddFun, A::AbstractArray{T}, region) + if method_exists(zero, (Type{T},)) + x = evaluate(f, zero(T)) + z = zero(x) + zero(x) + Tr = typeof(z) == typeof(x) && !isbits(T) ? T : typeof(z) + else + z = zero(sum(f, A)) + Tr = typeof(z) end - ndimsA = ndims(A) - key = (ndimsA, f) - if !haskey(reducedim_cache,key) - func = reducedim!_function(ndimsA, f) - reducedim_cache[key] = func + return reducedim_initarray(A, region, z, Tr) +end + +function reducedim_init{T}(f, op::MulFun, A::AbstractArray{T}, region) + if method_exists(zero, (Type{T},)) + x = evaluate(f, zero(T)) + z = one(x) * one(x) + Tr = typeof(z) == typeof(x) && !isbits(T) ? T : typeof(z) else - func = reducedim_cache[key] + z = one(prod(f, A)) + Tr = typeof(z) end - func(R, A)::typeof(R) + return reducedim_initarray(A, region, z, Tr) end -end # let reducedim_cache -# Generate the body for a reduction function reduce!(f, R, A), using binary operation f, -# where R is the output and A is the input. -# R must have already been set to the appropriate size and initialized with the seed value -function gen_reduction_body(N, f::Function) - F = Expr(:quote, f) - quote - (isempty(R) || isempty(A)) && return R - for i = 1:$N - (size(R, i) == size(A, i) || size(R, i) == 1) || throw(DimensionMismatch("Reduction on array of size $(size(A)) with output of size $(size(R))")) - end - @nextract $N sizeR d->size(R,d) - # If we're reducing along dimension 1, for efficiency we can make use of a temporary. - # Otherwise, keep the result in R so that we traverse A in storage order. - if size(R, 1) < size(A, 1) - @nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds tmp = (@nref $N R j) - for i_1 = 1:size(A,1) - @inbounds tmp = ($F)(tmp, (@nref $N A i)) +reducedim_init{T}(f, op::MaxFun, A::AbstractArray{T}, region) = reducedim_initarray0(A, region, typemin(evaluate(f, zero(T)))) +reducedim_init{T}(f, op::MinFun, A::AbstractArray{T}, region) = reducedim_initarray0(A, region, typemax(evaluate(f, zero(T)))) +reducedim_init{T}(f::Union(AbsFun,Abs2Fun), op::MaxFun, A::AbstractArray{T}, region) = + reducedim_initarray(A, region, zero(evaluate(f, zero(T)))) + +reducedim_init(f, op::AndFun, A::AbstractArray, region) = reducedim_initarray(A, region, true) +reducedim_init(f, op::OrFun, A::AbstractArray, region) = reducedim_initarray(A, region, false) + + +## generic (map)reduction + +has_fast_linear_indexing(a::AbstractArray) = false +has_fast_linear_indexing(a::Array) = true + +function check_reducdims(R, A) + # Check whether R has compatible dimensions w.r.t. A for reduction + # + # It returns an integer value value (useful for choosing implementation) + # - If it reduces only along leading dimensions, e.g. sum(A, 1) or sum(A, (1, 2)), + # it returns the length of the leading slice. For the two examples above, + # it will be size(A, 1) or size(A, 1) * size(A, 2). + # - Otherwise, e.g. sum(A, 2) or sum(A, (1, 3)), it returns 0. + # + lsiz = 1 + had_nonreduc = false + for i = 1:ndims(A) + sRi = size(R, i) + sAi = size(A, i) + if sRi == 1 + if sAi > 1 + if had_nonreduc + lsiz = 0 # to reduce along i, but some previous dimensions were non-reducing + else + lsiz *= sAi # if lsiz was set to zero, it will stay to be zero end - @inbounds (@nref $N R j) = tmp end else - @nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds (@nref $N R j) = ($F)((@nref $N R j), (@nref $N A i)) - end + sRi == sAi || + throw(DimensionMismatch("Reduction on array of size $(size(A)) with output of size $(size(R))")) + had_nonreduc = true end - R end + return lsiz end -reduction_init{T}(A::AbstractArray, region, initial::T, Tr=T) = fill!(similar(A,Tr,reduced_dims(A,region)), initial) - -function initarray!{T}(a::AbstractArray{T}, v::T, init::Bool) - if init - fill!(a, v) - end - return a -end - - -##### Specific reduction functions ##### - -## sum - -@ngenerate N typeof(R) function _sum!{T,N}(f, R::AbstractArray, A::AbstractArray{T,N}) - (isempty(R) || isempty(A)) && return R - rdims = 0 - for i = 1:N - if size(R, i) == size(A, i) - elseif size(R, i) == 1 - rdims += 1 - else - throw(DimensionMismatch("sum of array of size $(size(A)) with output of size $(size(R))")) - end - end - @nextract N sizeR d->size(R,d) - # If we're reducing along dimension 1 and dimension 1 is - # sufficiently large, use the pairwise implementation. Otherwise, - # keep the result in R so that we traverse A in storage order. - sz1 = size(A, 1) - if size(R, 1) < sz1 && sz1 >= 16 && rdims == 1 - for i = 1:div(length(A), sz1) - @inbounds R[i] = mapreduce_impl(f, AddFun(), A, (i-1)*sz1+1, i*sz1) - end - else - @nloops N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds (@nref N R j) += evaluate(f, @nref N A i) +function mapreducedim!_func(N::Int) + @eval begin + local _F_ + function _F_(f, op, R, A) + lsiz = check_reducdims(R, A) + isempty(A) && return R + @nextract $N sizeR d->size(R,d) + sizA1 = size(A, 1) + + if has_fast_linear_indexing(A) && lsiz > 16 + # use mapreduce_impl, which is probably better tuned to achieve higher performance + nslices = div(length(A), lsiz) + ibase = 0 + for i = 1:nslices + @inbounds R[i] = mapreduce_impl(f, op, A, ibase+1, ibase+lsiz) + ibase += lsiz + end + elseif size(R, 1) == 1 && sizA1 > 1 + # keep the accumulator as a local variable when reducing along the first dimension + @nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds r = (@nref $N R j) + for i_1 = 1:sizA1 + @inbounds v = evaluate(f, (@nref $N A i)) + r = evaluate(op, r, v) + end + @inbounds (@nref $N R j) = r + end + else + # general implementation + @nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds v = evaluate(f, (@nref $N A i)) + @inbounds (@nref $N R j) = evaluate(op, (@nref $N R j), v) + end + end + return R end - end - R + _F_ + end # @eval end -function sum{T}(f::Union(Function,Func{1}), A::AbstractArray{T}, region) - if method_exists(zero, (Type{T},)) - fz = evaluate(f, zero(T)) - z = fz + fz - Tr = typeof(z) == typeof(fz) && !isbits(T) ? T : typeof(z) +let mapreducedim_fcache = (Int=>Function)[] +global _mapreducedim! +function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) + isempty(R) && return R + ndimsA = ndims(A) + if !haskey(mapreducedim_fcache, ndimsA) + func! = mapreducedim!_func(ndimsA) + mapreducedim_fcache[ndimsA] = func! else - # TODO: handle more heterogeneous sums. e.g. sum(A, 1) where - # A is a Matrix{Any} with one column of numbers and one of vectors - z = zero(sum(f, A)) - Tr = typeof(z) + func! = mapreducedim_fcache[ndimsA] end - _sum!(f, reduction_init(A, region, z, Tr), A) + func!(f, op, R, A)::typeof(R) end +end # let mapreducedim_fcache -sum!{R}(f::Union(Function,Func{1}), r::AbstractArray{R}, A::AbstractArray; init::Bool=true) = - _sum!(f, initarray!(r, zero(R), init), A) +mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) = _mapreducedim!(f, op, R, A) -for (fname, func) in ((:sum, :IdFun), (:sumabs, :AbsFun), (:sumabs2, :Abs2Fun)) - @eval begin - $fname(A::AbstractArray, region) = sum($func(), A, region) - $(symbol("$(fname)!")){R}(r::AbstractArray{R}, A::AbstractArray; init::Bool=true) = - sum!($func(), r, A; init=init) - end +function mapreducedim!(f::Function, op, R::AbstractArray, A::AbstractArray) + is(op, +) ? _mapreducedim!(f, AddFun(), R, A) : + is(op, *) ? _mapreducedim!(f, MulFun(), R, A) : + is(op, &) ? _mapreducedim!(f, AndFun(), R, A) : + is(op, |) ? _mapreducedim!(f, OrFun(), R, A) : + _mapreducedim!(f, op, R, A) end +reducedim!{RT}(op, R::AbstractArray{RT}, A::AbstractArray) = mapreducedim!(IdFun(), op, R, A, zero(RT)) -## prod - -eval(ngenerate(:N, :(typeof(R)), :(_prod!{T,N}(R::AbstractArray, A::AbstractArray{T,N})), N->gen_reduction_body(N, *))) -prod!{R}(r::AbstractArray{R}, A::AbstractArray; init::Bool=true) = _prod!(initarray!(r, one(R), init), A) - -function prod{T}(A::AbstractArray{T}, region) - if method_exists(one, (Type{T},)) - z = one(T) * one(T) - Tr = typeof(z) == typeof(one(T)) ? T : typeof(z) - else - # TODO: handle more heterogeneous products. e.g. prod(A, 1) where - # A is a Matrix{Any} with one column of numbers and one of vectors - z = one(prod(A)) - Tr = typeof(z) - end - _prod!(reduction_init(A, region, z, Tr), A) -end - -prod(A::AbstractArray{Bool}, region) = error("use all() instead of prod() for boolean arrays") +mapreducedim(f, op, A::AbstractArray, region, v0) = mapreducedim!(f, op, reducedim_initarray(A, region, v0), A) +mapreducedim{T}(f, op, A::AbstractArray{T}, region) = mapreducedim!(f, op, reducedim_init(f, op, A, region), A) +reducedim(op, A::AbstractArray, region, v0) = mapreducedim(IdFun(), op, A, region, v0) +reducedim(op, A::AbstractArray, region) = mapreducedim(IdFun(), op, A, region) -## maximum & minimum -eval(ngenerate(:N, :(typeof(R)), :(_maximum!{T,N}(R::AbstractArray, A::AbstractArray{T,N})), N->gen_reduction_body(N, scalarmax))) -maximum!{R}(r::AbstractArray{R}, A::AbstractArray; init::Bool=true) = _maximum!(initarray!(r, typemin(R), init), A) -maximum{T}(A::AbstractArray{T}, region) = - isempty(A) ? similar(A,reduced_dims0(A,region)) : _maximum!(reduction_init(A, region, typemin(T)), A) - -eval(ngenerate(:N, :(typeof(R)), :(_minimum!{T,N}(R::AbstractArray, A::AbstractArray{T,N})), N->gen_reduction_body(N, scalarmin))) -minimum!{R}(r::AbstractArray{R}, A::AbstractArray; init::Bool=true) = _minimum!(initarray!(r, typemax(R), init), A) -minimum{T}(A::AbstractArray{T}, region) = - isempty(A) ? similar(A, reduced_dims0(A, region)) : _minimum!(reduction_init(A, region, typemax(T)), A) +##### Specific reduction functions ##### +for (fname, Op) in [(:sum, :AddFun), (:prod, :MulFun), + (:maximum, :MaxFun), (:minimum, :MinFun), + (:all, :AndFun), (:any, :OrFun)] -## all & any + fname! = symbol(string(fname, '!')) + @eval begin + $(fname!)(f::Union(Function,Func{1}), r::AbstractArray, A::AbstractArray; init::Bool=true) = + mapreducedim!(f, $(Op)(), initarray!(r, $(Op)(), init), A) + $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(IdFun(), r, A; init=init) -eval(ngenerate(:N, :(typeof(R)), :(_all!{N}(R::AbstractArray, A::AbstractArray{Bool,N})), N->gen_reduction_body(N, &))) -all!(r::AbstractArray, A::AbstractArray{Bool}; init::Bool=true) = _all!(initarray!(r, true, init), A) -all(A::AbstractArray{Bool}, region) = _all!(reduction_init(A, region, true), A) + $(fname)(f::Union(Function,Func{1}), A::AbstractArray, region) = + mapreducedim(f, $(Op)(), A, region) + $(fname)(A::AbstractArray, region) = $(fname)(IdFun(), A, region) + end +end -eval(ngenerate(:N, :(typeof(R)), :(_any!{N}(R::AbstractArray, A::AbstractArray{Bool,N})), N->gen_reduction_body(N, |))) -any!(r::AbstractArray, A::AbstractArray{Bool}; init::Bool=true) = _any!(initarray!(r, false, init), A) -any(A::AbstractArray{Bool}, region) = _any!(reduction_init(A, region, false), A) +for (fname, fbase, Fun) in [(:sumabs, :sum, :AbsFun), + (:sumabs2, :sum, :Abs2Fun), + (:maxabs, :maximum, :AbsFun), + (:minabs, :minimum, :AbsFun)] + fname! = symbol(string(fname, '!')) + fbase! = symbol(string(fbase, '!')) + @eval begin + $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = + $(fbase!)($(Fun)(), r, A; init=init) + $(fname)(A::AbstractArray, region) = $(fbase)($(Fun)(), A, region) + end +end -## findmin & findmax +##### findmin & findmax ##### # Generate the body for a reduction function reduce!(f, Rval, Rind, A), using a comparison operator f # Rind contains the index of A from which Rval was taken @@ -272,10 +287,12 @@ eval(ngenerate(:N, :(typeof((Rval,Rind))), :(_findmin!{T,N}(Rval::AbstractArray, findmin!{R}(rval::AbstractArray{R}, rind::AbstractArray, A::AbstractArray; init::Bool=true) = _findmin!(initarray!(rval, typemax(R), init), rind, A) findmin{T}(A::AbstractArray{T}, region) = isempty(A) ? (similar(A,reduced_dims0(A,region)), zeros(Int,reduced_dims0(A,region))) : - _findmin!(reduction_init(A, region, typemax(T)), zeros(Int,reduced_dims0(A,region)), A) + _findmin!(reducedim_initarray0(A, region, typemax(T)), zeros(Int,reduced_dims0(A,region)), A) eval(ngenerate(:N, :(typeof((Rval,Rind))), :(_findmax!{T,N}(Rval::AbstractArray, Rind::AbstractArray, A::AbstractArray{T,N})), N->gen_findreduction_body(N, >))) findmax!{R}(rval::AbstractArray{R}, rind::AbstractArray, A::AbstractArray; init::Bool=true) = _findmax!(initarray!(rval, typemin(R), init), rind, A) findmax{T}(A::AbstractArray{T}, region) = isempty(A) ? (similar(A,reduced_dims0(A,region)), zeros(Int,reduced_dims0(A,region))) : - _findmax!(reduction_init(A, region, typemin(T)), zeros(Int,reduced_dims0(A,region)), A) + _findmax!(reducedim_initarray0(A, region, typemin(T)), zeros(Int,reduced_dims0(A,region)), A) + + diff --git a/test/reducedim.jl b/test/reducedim.jl index 2d35070c35fb7..6f8b9b18b004a 100644 --- a/test/reducedim.jl +++ b/test/reducedim.jl @@ -13,9 +13,9 @@ safe_sumabs2{T}(A::Array{T}, region) = safe_mapslices(sum, abs2(A), region) Areduc = rand(3, 4, 5, 6) for region in { - 1, 2, 3, 4, 5, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4), + 1, 2, 3, 4, 5, (1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4), (1, 2, 3), (1, 3, 4), (2, 3, 4), (1, 2, 3, 4)} - + # println("region = $region") r = fill(NaN, Base.reduced_dims(size(Areduc), region)) @test_approx_eq sum!(r, Areduc) safe_sum(Areduc, region) @test_approx_eq prod!(r, Areduc) safe_prod(Areduc, region) From 34410d11ef8b012856ac106e95b323fc7e8c7880 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Wed, 4 Jun 2014 09:59:46 -0500 Subject: [PATCH 2/4] use ngenerate for mapreducedim --- base/reducedim.jl | 79 +++++++++++++++++++---------------------------- 1 file changed, 31 insertions(+), 48 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 807e153f6cc5a..0a9fe46067561 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -132,60 +132,43 @@ function check_reducdims(R, A) return lsiz end -function mapreducedim!_func(N::Int) - @eval begin - local _F_ - function _F_(f, op, R, A) - lsiz = check_reducdims(R, A) - isempty(A) && return R - @nextract $N sizeR d->size(R,d) - sizA1 = size(A, 1) - - if has_fast_linear_indexing(A) && lsiz > 16 - # use mapreduce_impl, which is probably better tuned to achieve higher performance - nslices = div(length(A), lsiz) - ibase = 0 - for i = 1:nslices - @inbounds R[i] = mapreduce_impl(f, op, A, ibase+1, ibase+lsiz) - ibase += lsiz - end - elseif size(R, 1) == 1 && sizA1 > 1 - # keep the accumulator as a local variable when reducing along the first dimension - @nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds r = (@nref $N R j) - for i_1 = 1:sizA1 - @inbounds v = evaluate(f, (@nref $N A i)) - r = evaluate(op, r, v) - end - @inbounds (@nref $N R j) = r - end - else - # general implementation - @nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin +function mapreducedim_body(N::Int) + quote + lsiz = check_reducdims(R, A) + isempty(A) && return R + @nextract $N sizeR d->size(R,d) + sizA1 = size(A, 1) + + if has_fast_linear_indexing(A) && lsiz > 16 + # use mapreduce_impl, which is probably better tuned to achieve higher performance + nslices = div(length(A), lsiz) + ibase = 0 + for i = 1:nslices + @inbounds R[i] = mapreduce_impl(f, op, A, ibase+1, ibase+lsiz) + ibase += lsiz + end + elseif size(R, 1) == 1 && sizA1 > 1 + # keep the accumulator as a local variable when reducing along the first dimension + @nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds r = (@nref $N R j) + for i_1 = 1:sizA1 @inbounds v = evaluate(f, (@nref $N A i)) - @inbounds (@nref $N R j) = evaluate(op, (@nref $N R j), v) + r = evaluate(op, r, v) end + @inbounds (@nref $N R j) = r + end + else + # general implementation + @nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds v = evaluate(f, (@nref $N A i)) + @inbounds (@nref $N R j) = evaluate(op, (@nref $N R j), v) end - return R end - _F_ - end # @eval -end - -let mapreducedim_fcache = (Int=>Function)[] -global _mapreducedim! -function _mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) - isempty(R) && return R - ndimsA = ndims(A) - if !haskey(mapreducedim_fcache, ndimsA) - func! = mapreducedim!_func(ndimsA) - mapreducedim_fcache[ndimsA] = func! - else - func! = mapreducedim_fcache[ndimsA] + return R end - func!(f, op, R, A)::typeof(R) end -end # let mapreducedim_fcache +eval(ngenerate(:N, :(typeof(R)), + :(_mapreducedim!{T,N}(f, op, R::AbstractArray, A::AbstractArray{T,N})), mapreducedim_body)) mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) = _mapreducedim!(f, op, R, A) From fe35eeee2f500f0f65ea0ee7fe526292f6b08157 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Wed, 4 Jun 2014 10:18:11 -0500 Subject: [PATCH 3/4] specialized methods to make the initialization for reducedim more efficient for common cases --- base/reducedim.jl | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/base/reducedim.jl b/base/reducedim.jl index 0a9fe46067561..59ce195fe343f 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -95,6 +95,21 @@ reducedim_init{T}(f::Union(AbsFun,Abs2Fun), op::MaxFun, A::AbstractArray{T}, reg reducedim_init(f, op::AndFun, A::AbstractArray, region) = reducedim_initarray(A, region, true) reducedim_init(f, op::OrFun, A::AbstractArray, region) = reducedim_initarray(A, region, false) +# specialize to make initialization more efficient for common cases + +typealias CommonReduceResult Union(Uint64,Uint128,Int64,Int128,Float32,Float64,Complex64,Complex128) + +for (IT, RT) in ((:CommonReduceResult, :T), (:SmallSigned, :Int), (:SmallUnsigned, :Uint)) + @eval begin + reducedim_init{T<:$IT}(f::Union(IdFun,AbsFun,Abs2Fun), op::AddFun, A::AbstractArray{T}, region) = + reducedim_initarray(A, region, zero($RT)) + reducedim_init{T<:$IT}(f::Union(IdFun,AbsFun,Abs2Fun), op::MulFun, A::AbstractArray{T}, region) = + reducedim_initarray(A, region, one($RT)) + end +end +reducedim_init(f::Union(IdFun,AbsFun,Abs2Fun), op::AddFun, A::AbstractArray{Bool}, region) = + reducedim_initarray(A, region, 0) + ## generic (map)reduction From 99193d97a2e18f02f7f6212ad9ac9348b7959ad1 Mon Sep 17 00:00:00 2001 From: Dahua Lin Date: Wed, 4 Jun 2014 11:38:03 -0500 Subject: [PATCH 4/4] use `@ngenerate` macro for mapreducedim! --- base/reducedim.jl | 62 ++++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/base/reducedim.jl b/base/reducedim.jl index 59ce195fe343f..bc9450fc5d560 100644 --- a/base/reducedim.jl +++ b/base/reducedim.jl @@ -147,43 +147,39 @@ function check_reducdims(R, A) return lsiz end -function mapreducedim_body(N::Int) - quote - lsiz = check_reducdims(R, A) - isempty(A) && return R - @nextract $N sizeR d->size(R,d) - sizA1 = size(A, 1) - - if has_fast_linear_indexing(A) && lsiz > 16 - # use mapreduce_impl, which is probably better tuned to achieve higher performance - nslices = div(length(A), lsiz) - ibase = 0 - for i = 1:nslices - @inbounds R[i] = mapreduce_impl(f, op, A, ibase+1, ibase+lsiz) - ibase += lsiz - end - elseif size(R, 1) == 1 && sizA1 > 1 - # keep the accumulator as a local variable when reducing along the first dimension - @nloops $N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds r = (@nref $N R j) - for i_1 = 1:sizA1 - @inbounds v = evaluate(f, (@nref $N A i)) - r = evaluate(op, r, v) - end - @inbounds (@nref $N R j) = r - end - else - # general implementation - @nloops $N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin - @inbounds v = evaluate(f, (@nref $N A i)) - @inbounds (@nref $N R j) = evaluate(op, (@nref $N R j), v) +@ngenerate N typeof(R) function _mapreducedim!{T,N}(f, op, R::AbstractArray, A::AbstractArray{T,N}) + lsiz = check_reducdims(R, A) + isempty(A) && return R + @nextract N sizeR d->size(R,d) + sizA1 = size(A, 1) + + if has_fast_linear_indexing(A) && lsiz > 16 + # use mapreduce_impl, which is probably better tuned to achieve higher performance + nslices = div(length(A), lsiz) + ibase = 0 + for i = 1:nslices + @inbounds R[i] = mapreduce_impl(f, op, A, ibase+1, ibase+lsiz) + ibase += lsiz + end + elseif size(R, 1) == 1 && sizA1 > 1 + # keep the accumulator as a local variable when reducing along the first dimension + @nloops N i d->(d>1? (1:size(A,d)) : (1:1)) d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds r = (@nref N R j) + for i_1 = 1:sizA1 + @inbounds v = evaluate(f, (@nref N A i)) + r = evaluate(op, r, v) end + @inbounds (@nref N R j) = r + end + else + # general implementation + @nloops N i A d->(j_d = sizeR_d==1 ? 1 : i_d) begin + @inbounds v = evaluate(f, (@nref N A i)) + @inbounds (@nref N R j) = evaluate(op, (@nref N R j), v) end - return R end + return R end -eval(ngenerate(:N, :(typeof(R)), - :(_mapreducedim!{T,N}(f, op, R::AbstractArray, A::AbstractArray{T,N})), mapreducedim_body)) mapreducedim!(f, op, R::AbstractArray, A::AbstractArray) = _mapreducedim!(f, op, R, A)