Skip to content

Commit

Permalink
Split Array.countBy/groupBy by ValueType/RefType
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Westcott committed Jul 21, 2015
1 parent 02e6d42 commit d4b6861
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 19 deletions.
70 changes: 53 additions & 17 deletions src/fsharp/FSharp.Core/array.fs
Original file line number Diff line number Diff line change
Expand Up @@ -172,24 +172,39 @@ namespace Microsoft.FSharp.Collections

Microsoft.FSharp.Primitives.Basics.Array.subUnchecked 0 count array

[<CompiledName("CountBy")>]
let countBy projection (array:'T[]) =
checkNonNull "array" array
let dict = new Dictionary<Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>,int>(Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer)
let countByImpl (comparer:IEqualityComparer<'SafeKey>) (projection:'T->'SafeKey) (getKey:'SafeKey->'Key) (array:'T[]) =
let dict = Dictionary comparer

// Build the groupings
for v in array do
let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection v)
let key = projection v
let mutable prev = Unchecked.defaultof<_>
if dict.TryGetValue(key, &prev) then dict.[key] <- prev + 1 else dict.[key] <- 1

let res = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count
let mutable i = 0
for group in dict do
res.[i] <- group.Key.Value, group.Value
res.[i] <- getKey group.Key, group.Value
i <- i + 1
res

// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
let countByValueType (projection:'T->'Key) (array:'T[]) = countByImpl HashIdentity.Structural<'Key> projection id array

// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
let countByRefType (projection:'T->'Key) (array:'T[]) = countByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection t)) (fun sb -> sb.Value) array

[<CompiledName("CountBy")>]
let countBy (projection:'T->'Key) (array:'T[]) =
checkNonNull "array" array
#if FX_ATLEAST_40
if typeof<'Key>.IsValueType
then countByValueType projection array
else countByRefType projection array
#else
countByRefType projection array
#endif

[<CompiledName("Append")>]
let append (array1:'T[]) (array2:'T[]) =
checkNonNull "array1" array1
Expand Down Expand Up @@ -408,32 +423,53 @@ namespace Microsoft.FSharp.Collections
let rec loop i = i >= len1 || (f.Invoke(array1.[i], array2.[i]) && loop (i+1))
loop 0

[<CompiledName("GroupBy")>]
let groupBy keyf (array: 'T[]) =
checkNonNull "array" array
let dict = new Dictionary<RuntimeHelpers.StructBox<'Key>,ResizeArray<'T>>(RuntimeHelpers.StructBox<'Key>.Comparer)
let groupByImpl (comparer:IEqualityComparer<'SafeKey>) (keyf:'T->'SafeKey) (getKey:'SafeKey->'Key) (array: 'T[]) =
let dict = Dictionary<_,ResizeArray<_>> comparer

// Previously this was 1, but I think this is rather stingy, considering that we are alreadying paying
// for at least a key, the ResizeArray reference, which includes an array reference, an Entry in the
// Dictionary, plus any empty space in the Dictionary of unfilled hash buckets. Having it larger means
// that we won't be having as many re-allocations. The ResizeArray is destroyed at the end anyway.
let initialBucketSize = 4

// Build the groupings
for i = 0 to (array.Length - 1) do
let v = array.[i]
let key = RuntimeHelpers.StructBox (keyf v)
let ok, prev = dict.TryGetValue(key)
if ok then
prev.Add(v)
let key = keyf v
let mutable prev = Unchecked.defaultof<_>
if dict.TryGetValue(key, &prev) then
prev.Add v
else
let prev = new ResizeArray<'T>(1)
let prev = ResizeArray initialBucketSize
dict.[key] <- prev
prev.Add(v)
prev.Add v

// Return the array-of-arrays.
let result = Microsoft.FSharp.Primitives.Basics.Array.zeroCreateUnchecked dict.Count
let mutable i = 0
for group in dict do
result.[i] <- group.Key.Value, group.Value.ToArray()
result.[i] <- getKey group.Key, group.Value.ToArray()
i <- i + 1

result

// We avoid wrapping a StructBox, because under 64 JIT we get some "hard" tailcalls which affect performance
let groupByValueType (keyf:'T->'Key) (array:'T[]) = groupByImpl HashIdentity.Structural<'Key> keyf id array

// Wrap a StructBox around all keys in case the key type is itself a type using null as a representation
let groupByRefType (keyf:'T->'Key) (array:'T[]) = groupByImpl Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox<'Key>.Comparer (fun t -> Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (keyf t)) (fun sb -> sb.Value) array

[<CompiledName("GroupBy")>]
let groupBy (keyf:'T->'Key) (array:'T[]) =
checkNonNull "array" array
#if FX_ATLEAST_40
if typeof<'Key>.IsValueType
then groupByValueType keyf array
else groupByRefType keyf array
#else
groupByRefType keyf array
#endif

[<CompiledName("Pick")>]
let pick f (array: _[]) =
checkNonNull "array" array
Expand Down
4 changes: 2 additions & 2 deletions src/fsharp/FSharp.Core/list.fs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ namespace Microsoft.FSharp.Collections
then countByValueType projection list
else countByRefType projection list
#else
countByRefType projection source
countByRefType projection list
#endif

[<CompiledName("Map")>]
Expand Down Expand Up @@ -492,7 +492,7 @@ namespace Microsoft.FSharp.Collections
then groupByValueType keyf list
else groupByRefType keyf list
#else
groupByRefType keyf source
groupByRefType keyf list
#endif

[<CompiledName("Partition")>]
Expand Down

0 comments on commit d4b6861

Please sign in to comment.