Remove StructBox for Value Types #549

manofstick · 2015-07-22T05:07:20Z

The StructBox idiom is used for countBy/groupBy/dict so that reference type's with null values can work.

For Value Types this is just an extra layer of indirection which adds a performance cost without any benefit. This change request adds checks on the keys type, and then calls a customised value or reference type version.

This is related to #513, but can be considered a worthwhile optimization on it's own.

The StructBox makes code that contains "hard" tail calls, which means that performance suffers under the 64 bit JIT

There must be some other way to check if a type is a Value Type? I doubt if it has been removed?

msftclas · 2015-07-22T05:07:24Z

Hi @manofstick, I'm your friendly neighborhood Microsoft Pull Request Bot (You can call me MSBOT). Thanks for your contribution!
You've already signed the contribution license agreement. Thanks!

The agreement was validated by Microsoft and real humans are currently evaluating your PR.

TTYL, MSBOT;

manofstick · 2015-07-22T05:38:44Z

The following are some results from a timing test listed below. Note that the times for the 64-bit JIT are awful, even after this change (albeit a fair bit better). This is due to the 'hard' tail call optimizations. Removing the StructBox removed a layer of function calls that emitted tails, but there are still other performance layers to tear away (i.e. #513)

(not sure if the broken build is my fault - I do need to check this [on my laptop on the tram doesn't make for good full builds], but the results from appveyor seem to not have done anything)

Test	x86_old	x86_new	x64_old	x64_new	x86_new	x64_new
seqGroupBy	1707	1677	4188	2980	98.24%	71.16%
seqCountBy	2280	2032	6976	4297	89.12%	61.60%
listGroupBy	1230	1092	3538	2283	88.78%	64.53%
listCountBy	1822	1554	6466	3783	85.29%	58.51%
arrayCountBy	1624	1350	6212	3589	83.13%	57.78%
arrayGroupBy	931	774	3230	1887	83.14%	58.42%
dict	848	780	1995	1336	91.98%	66.97%

The code

module Program

open System.Diagnostics

type Key1 =
    struct
        val A : int
        new (a)= { A = a }
    end

type Key2 =
    struct
        val A : int
        val B : int
        new (a,b)= { A=a;B=b }
    end

type Key3 =
    struct
        val A : int
        val B : int
        val C : int
        new (a,b,c)= { A=a;B=b;C=c }
    end

type Key4 =
    struct
        val A : int
        val B : int
        val C : int
        val D : int
        new (a,b,c,d)= { A=a;B=b;C=c;D=d }
    end

type Key5 =
    struct
        val A : int
        val B : int
        val C : int
        val D : int
        val E : int
        new (a,b,c,d,e)= { A=a;B=b;C=c;D=d;E=e }
    end

let seqTest f n createKey =
    let sw = Stopwatch.StartNew ()
    let count =
        Seq.init n id
        |> f createKey
        |> Seq.length
    count, sw.ElapsedMilliseconds

let listTest f n createKey =
    let sw = Stopwatch.StartNew ()
    let count =
        List.init n id
        |> f createKey
        |> List.length
    count, sw.ElapsedMilliseconds

let arrayTest f n createKey =
    let sw = Stopwatch.StartNew ()
    let count =
        Array.init n id
        |> f createKey
        |> Array.length
    count, sw.ElapsedMilliseconds

let dictTest n createKey =
    let sw = Stopwatch.StartNew ()
    let count =
        Array.init n (fun n -> createKey n, n)
        |> dict
        |> fun d -> d.Count
    count, sw.ElapsedMilliseconds

let createKey2 n = Key2(n%11,n%7)
let createKey3 n = Key3(n%11,n%7,n%5)
let createKey4 n = Key4(n%11,n%7,n%5,n%3)
let createKey5 n = Key5(n%11,n%7,n%5,n%3,n%2)
let createDictKey2 n = Key2(n,n)
let createDictKey3 n = Key3(n,n,n)
let createDictKey4 n = Key4(n,n,n,n)
let createDictKey5 n = Key5(n,n,n,n,n)

let runSeqGroupByTest () =
    let size = 100000
    let mutable totalTime = 0L
    for i = 1 to 5 do
        let c2, t2 = seqTest Seq.groupBy size createKey2
        let c3, t3 = seqTest Seq.groupBy size createKey3
        let c4, t4 = seqTest Seq.groupBy size createKey4
        let c5, t5 = seqTest Seq.groupBy size createKey5
        totalTime <- totalTime + t2 + t3 + t4 + t5
        printfn "seqGroupBy %d,%d,%d,%d (%d,%d,%d,%d)" t2 t3 t4 t5 c2 c3 c4 c5
    printfn "seqGroupBy totalTime=%d" totalTime

let runSeqCountByTest () =
    let size = 100000
    let mutable totalTime = 0L
    for i = 1 to 5 do
        let c2, t2 = seqTest Seq.countBy size createKey2
        let c3, t3 = seqTest Seq.countBy size createKey3
        let c4, t4 = seqTest Seq.countBy size createKey4
        let c5, t5 = seqTest Seq.countBy size createKey5
        totalTime <- totalTime + t2 + t3 + t4 + t5
        printfn "seqCountBy %d,%d,%d,%d (%d,%d,%d,%d)" t2 t3 t4 t5 c2 c3 c4 c5
    printfn "seqCountBy totalTime=%d" totalTime

let runListGroupByTest () =
    let size = 100000
    let mutable totalTime = 0L
    for i = 1 to 5 do
        let c2, t2 = listTest List.groupBy size createKey2
        let c3, t3 = listTest List.groupBy size createKey3
        let c4, t4 = listTest List.groupBy size createKey4
        let c5, t5 = listTest List.groupBy size createKey5
        totalTime <- totalTime + t2 + t3 + t4 + t5
        printfn "listGroupBy %d,%d,%d,%d (%d,%d,%d,%d)" t2 t3 t4 t5 c2 c3 c4 c5
    printfn "listGroupBy totalTime=%d" totalTime

let runListCountByTest () =
    let size = 100000
    let mutable totalTime = 0L
    for i = 1 to 5 do
        let c2, t2 = listTest List.countBy size createKey2
        let c3, t3 = listTest List.countBy size createKey3
        let c4, t4 = listTest List.countBy size createKey4
        let c5, t5 = listTest List.countBy size createKey5
        totalTime <- totalTime + t2 + t3 + t4 + t5
        printfn "listCountBy %d,%d,%d,%d (%d,%d,%d,%d)" t2 t3 t4 t5 c2 c3 c4 c5
    printfn "listCountBy totalTime=%d" totalTime

let runArrayGroupByTest () =
    let size = 100000
    let mutable totalTime = 0L
    for i = 1 to 5 do
        let c2, t2 = arrayTest Array.groupBy size createKey2
        let c3, t3 = arrayTest Array.groupBy size createKey3
        let c4, t4 = arrayTest Array.groupBy size createKey4
        let c5, t5 = arrayTest Array.groupBy size createKey5
        totalTime <- totalTime + t2 + t3 + t4 + t5
        printfn "arrayGroupBy %d,%d,%d,%d (%d,%d,%d,%d)" t2 t3 t4 t5 c2 c3 c4 c5
    printfn "arrayGroupBy totalTime=%d" totalTime

let runArrayCountByTest () =
    let size = 100000
    let mutable totalTime = 0L
    for i = 1 to 5 do
        let c2, t2 = arrayTest Array.countBy size createKey2
        let c3, t3 = arrayTest Array.countBy size createKey3
        let c4, t4 = arrayTest Array.countBy size createKey4
        let c5, t5 = arrayTest Array.countBy size createKey5
        totalTime <- totalTime + t2 + t3 + t4 + t5
        printfn "arrayCountBy %d,%d,%d,%d (%d,%d,%d,%d)" t2 t3 t4 t5 c2 c3 c4 c5
    printfn "arrayCountBy totalTime=%d" totalTime

let runDictTest () =
    let size = 100000
    let mutable totalTime = 0L
    for i = 1 to 5 do
        let c2, t2 = dictTest size createDictKey2
        let c3, t3 = dictTest size createDictKey3
        let c4, t4 = dictTest size createDictKey4
        let c5, t5 = dictTest size createDictKey5
        totalTime <- totalTime + t2 + t3 + t4 + t5
        printfn "arrayCountBy %d,%d,%d,%d (%d,%d,%d,%d)" t2 t3 t4 t5 c2 c3 c4 c5
    printfn "arrayCountBy totalTime=%d" totalTime


[<EntryPoint>]
let main _ =
    //printfn "%s" Id.Name
    runSeqGroupByTest()
    runSeqCountByTest()
    runListGroupByTest()
    runListCountByTest()
    runArrayCountByTest()
    runArrayGroupByTest()
    runDictTest ()
    0

I don't think this is a good way to structure exceptions, but it's to match current functionality

dsyme · 2015-07-22T11:07:31Z

Impressive. I haven't done a detailed code review yet (others - please help) but there's a lot of goodness here.

Which specific tailcalls are causing the 64-bit slowdown? (give links to specific lines?) We should likely add a general .NET perf bug about that, and also consider if there is a systematic way to eliminate these in the F# compiler itself rather than manually.

manofstick · 2015-08-02T01:26:26Z

@dsyme

Here is a cut down version which replicates the issue (could probably be a bit more minimal, but it's not too long...)

open System
open System.Diagnostics
open System.Collections.Generic

[<Struct>]
type StructBox<'T when 'T : equality>(value:'T) = 
    member x.Value = value
    static member Comparer =
        { new IEqualityComparer<StructBox<'T>> with 
                member __.GetHashCode(v) =
#if MAKE_IT_FAST
                    0 +
#endif
                     EqualityComparer<'T>.Default.GetHashCode(v.Value)
                member __.Equals(v1,v2) = failwith "not implemented" }

type StructTuple<'a,'b> =
  struct 
    val A : 'a
    val B : 'b
    new(a,b) = { A=a; B=b }
  end

let countlots<'a when 'a : equality>(a : StructBox<'a>) =
    let mutable sum = 0
    for i=0 to 10000000 do
        sum <- sum + StructBox.Comparer.GetHashCode(a)
    sum

let inline time_it<'a when 'a : equality> (a:StructBox<'a>) =
    let sw = Stopwatch.StartNew ()
    let count = countlots a
    printfn "%d (%d)" sw.ElapsedMilliseconds count

[<EntryPoint>]
let main argv = 
    time_it (StructBox (StructTuple(1, false)))
    time_it (StructBox (StructTuple(1L, false)))

    0

So build this with the MAKE_IT_FAST compilation symbol, and the f# compiler won't generate a tail instruction, and the performance will be basically equal for the two versions, otherwise the one with tail with be ~10x slower.

(I have just exchanged the HashIdentity.Structural to EqualityComparer.Default just to simplify what is going on)

I don't think this is a JIT "issue" per se - i.e. I think it is just a natural consequence of the x64 calling convention, although I haven't actually worked through the underlying assembly myself, I'm just relying on the overall gist of this and this.

manofstick · 2015-08-02T08:22:11Z

(Oh, and even without the tailcall issue, the change shows a 10-15% time decrease in the test suite on the x32 version which I think is still a reason enough improvement to warrant the change.)

latkin · 2015-08-11T01:33:29Z

src/fsharp/FSharp.Core/array.fs


            // Build the groupings
            for v in array do
-                let key = Microsoft.FSharp.Core.CompilerServices.RuntimeHelpers.StructBox (projection v)
+                let key = projection v


For these guys I would consider naming the value safeKey, to match the given type name. As it stands, the value key does not have type 'Key, nor could it be returned by getKey...

Similarly throughout.

manofstick · 2015-08-11T05:22:21Z

@latkin,

I'm happy to use "ResizeArray ()", and that is actually what I originally did, but the "problem" I found was that that decreased the performance of the use case where single allocations were occurring when there wasn't a second use of initialBucketSize to limit the call to TrimExcess().

That was why my comment/rant re: stingy allocation due to all the other baggage that is allocated.

Personally I use groupBy a lot, and I can't recall the last time I used it to bucket things into single items. (And I assume my usage is not that uncommon.)

So... How about we keep the restriction on the call to TrimExcess, and use the default constructor for ResizeArray ()? (I'm happy not to have the restriction on TrimExcess and use the default constructor, but it does punish people performance wise if they are using the single item case... or two or three items... Really the restriction should be there...)

Does this make sense? I'm sure this would be easier to explain over a beer... (Maybe I'll just have a few and rewrite the message...)

latkin · 2015-08-11T16:22:24Z

That sounds good to me - just use defaults for the resizearrays themselves, but maintain the new limitation on TrimExcess calls.

As per #549 (diff)

Initially this had been set to 1, I had changed it to 4, but after discussion it was decided that the default is probably the correct choice. As per #549 (comment)

manofstick · 2015-08-12T19:52:40Z

@latkin,

Done, and done.

latkin · 2015-08-12T19:59:11Z

Oh geez, i was just sending you a PR with those, plus an adjustment to the IsStructType check so that it works for all profiles.

I'll just add it myself then merge, unless you have an objection.

manofstick · 2015-08-12T22:02:24Z

SNAP! :-)

no worries; do as you see fit, I am but your humble servant!

latkin · 2015-08-13T01:04:02Z

@manofstick using your test code, I get below results on my 2 machines. On my slow box I see very minor (but consistent) improvement. On my fast box I see much more significant improvement, in line with your results.

Machine 1

x86

Test	old	new	new result
seqGroupBy	1162	1157	93.49%
seqCountBy	1473	1475	96.59%
listGroupBy	773	750	99.58%
listCountBy	1149	1152	98.87%
arrayCountBy	1003	1020	97.30%
arrayGroupBy	588	574	98.94%
dict	601	591	96.44%

x64 RyuJIT

Test	old	new	new result
seqGroupBy	1753	1780	101.54%
seqCountBy	2726	2709	99.38%
listGroupBy	1434	1448	100.98%
listCountBy	2481	2446	98.59%
arrayCountBy	2258	2261	100.13%
arrayGroupBy	1220	1217	99.75%
dict	978	957	97.85%

x64 legacy JIT

Test	old	new	new result
seqGroupBy	1941	1802	92.84%
seqCountBy	2925	2743	93.78%
listGroupBy	1511	1572	104.04%
listCountBy	2513	2535	100.88%
arrayCountBy	2340	2336	99.83%
arrayGroupBy	1279	1282	100.23%
dict	967	974	100.72%

Machine 2

x86

Test	old	new	new result
seqGroupBy	1131	1030	91.07%
seqCountBy	1551	1282	82.66%
listGroupBy	749	664	88.65%
listCountBy	1175	1014	86.30%
arrayCountBy	1056	877	83.05%
arrayGroupBy	594	493	83.00%
dict	596	534	89.60%

x64 RyuJIT

Test	old	new	new result
seqGroupBy	2504	1578	63.02%
seqCountBy	4114	2385	57.97%
listGroupBy	2275	1306	57.41%
listCountBy	3968	2257	56.88%
arrayCountBy	3779	2246	59.43%
arrayGroupBy	1971	1176	59.67%
dict	1417	983	69.37%

x64 legacy JIT

Test	old	new	new result
seqGroupBy	2345	1670	71.22%
seqCountBy	3943	2496	63.30%
listGroupBy	2123	1386	65.28%
listCountBy	3766	2284	60.65%
arrayCountBy	3886	2091	53.81%
arrayGroupBy	2041	1140	55.85%
dict	1416	917	64.76%

The StructBox makes code that contains "hard" tail calls, which means that performance suffers under the 64 bit JIT closes #549 commit 36f10b6214d8b73140b481e391f7999b9b8be8a3 Author: latkin <[email protected]> Date: Wed Aug 12 12:40:46 2015 -0700 Proper ref/val type checking for all portable profiles commit 037a5e1 Author: Paul Westcott <[email protected]> Date: Thu Aug 13 05:50:29 2015 +1000 Using default constructor for ResizeArray Initially this had been set to 1, I had changed it to 4, but after discussion it was decided that the default is probably the correct choice. As per #549 (comment) commit 3796a55 Author: Paul Westcott <[email protected]> Date: Thu Aug 13 05:45:38 2015 +1000 Renamed key to safeKey where appropriate As per #549 (diff) commit b7884f8 Author: Paul Westcott <[email protected]> Date: Wed Jul 22 17:12:30 2015 +1000 Restored null arg exception as lazy I don't think this is a good way to structure exceptions, but it's to match current functionality commit 23cc156 Author: Paul Westcott <[email protected]> Date: Wed Jul 22 05:53:33 2015 +1000 Split dict by ValueType/RefType commit d4b6861 Author: Paul Westcott <[email protected]> Date: Wed Jul 22 05:10:39 2015 +1000 Split Array.countBy/groupBy by ValueType/RefType commit 02e6d42 Author: Paul Westcott <[email protected]> Date: Wed Jul 22 04:55:42 2015 +1000 Split List.groupBy for ValueType/RefType commit d80e616 Author: Paul Westcott <[email protected]> Date: Wed Jul 22 04:43:54 2015 +1000 Split List.countBy by RefType/ValueType commit 202e12e Author: Paul Westcott <[email protected]> Date: Wed Jul 22 04:27:45 2015 +1000 "Fixing" Reflection issues with Profile builds There must be some other way to check if a type is a Value Type? I doubt if it has been removed? commit c06d8e6 Author: Paul Westcott <[email protected]> Date: Tue Jul 21 16:07:33 2015 +1000 Split Seq.countBy for ValueType/RefType commit 1c5ce38 Author: Paul Westcott <[email protected]> Date: Tue Jul 21 15:42:25 2015 +1000 Split Seq.groupBy for ValueType/RefType The StructBox makes code that contains "hard" tail calls, which means that performance suffers under the 64 bit JIT

latkin · 2015-08-13T01:07:12Z

Applied to OOB branch

Paul Westcott added 7 commits July 21, 2015 15:42

Split Seq.groupBy for ValueType/RefType

1c5ce38

The StructBox makes code that contains "hard" tail calls, which means that performance suffers under the 64 bit JIT

Split Seq.countBy for ValueType/RefType

c06d8e6

"Fixing" Reflection issues with Profile builds

202e12e

There must be some other way to check if a type is a Value Type? I doubt if it has been removed?

Split List.countBy by RefType/ValueType

d80e616

Split List.groupBy for ValueType/RefType

02e6d42

Split Array.countBy/groupBy by ValueType/RefType

d4b6861

Split dict by ValueType/RefType

23cc156

msftclas added the cla-already-signed label Jul 22, 2015

Restored null arg exception as lazy

b7884f8

I don't think this is a good way to structure exceptions, but it's to match current functionality

manofstick mentioned this pull request Jul 22, 2015

Optimization: Generic specialization for equality #513

Closed

manofstick mentioned this pull request Aug 1, 2015

Extraneous JIT_TailCall generated #533

Closed

latkin added this to the F# 4.0 Update 1 milestone Aug 4, 2015

jack-pappas mentioned this pull request Aug 11, 2015

Streamlining and minor cleanup of code in the PrettyNaming module #578

Closed

latkin reviewed Aug 11, 2015
View reviewed changes

Paul Westcott added 2 commits August 13, 2015 05:45

Renamed key to safeKey where appropriate

3796a55

As per #549 (diff)

Using default constructor for ResizeArray

037a5e1

Initially this had been set to 1, I had changed it to 4, but after discussion it was decided that the default is probably the correct choice. As per #549 (comment)

latkin closed this Aug 13, 2015

latkin added the fixed label Aug 13, 2015

manofstick mentioned this pull request Aug 17, 2015

Removing internal interface usage, and tail calls in "dict" #594

Closed

manofstick mentioned this pull request Jan 18, 2016

It's review time! #877

Closed

manofstick mentioned this pull request Feb 4, 2016

[WIP] Non-boxing equality for enums (in generic contexts) #930

Closed

manofstick mentioned this pull request Jun 9, 2018

[CompilerPerf] Faster equality in generic contexts #5112

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Remove StructBox for Value Types #549

Remove StructBox for Value Types #549

manofstick commented Jul 22, 2015

msftclas commented Jul 22, 2015

manofstick commented Jul 22, 2015

dsyme commented Jul 22, 2015

manofstick commented Aug 2, 2015

manofstick commented Aug 2, 2015

latkin Aug 11, 2015

manofstick commented Aug 11, 2015

latkin commented Aug 11, 2015

manofstick commented Aug 12, 2015

latkin commented Aug 12, 2015

manofstick commented Aug 12, 2015

latkin commented Aug 13, 2015

latkin commented Aug 13, 2015

Remove StructBox for Value Types #549

Remove StructBox for Value Types #549

Conversation

manofstick commented Jul 22, 2015

msftclas commented Jul 22, 2015

manofstick commented Jul 22, 2015

dsyme commented Jul 22, 2015

manofstick commented Aug 2, 2015

manofstick commented Aug 2, 2015

latkin Aug 11, 2015

Choose a reason for hiding this comment

manofstick commented Aug 11, 2015

latkin commented Aug 11, 2015

manofstick commented Aug 12, 2015

latkin commented Aug 12, 2015

manofstick commented Aug 12, 2015

latkin commented Aug 13, 2015

latkin commented Aug 13, 2015