From 36b7c0b8e28e78109594f74a4119ab0e1247bdd0 Mon Sep 17 00:00:00 2001
From: "dotnet-maestro[bot]"
 <42748379+dotnet-maestro[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 17:12:11 -0700
Subject: [PATCH 1/9] Update dependencies from https://github.com/dotnet/emsdk
 build 20230919.1 (#92306)

Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100.Transport
 From Version 8.0.0-rtm.23469.3 -> To Version 8.0.0-rc.2.23469.1

Co-authored-by: dotnet-maestro[bot] <dotnet-maestro[bot]@users.noreply.github.com>
---
 eng/Version.Details.xml | 4 ++--
 eng/Versions.props      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml
index 04567c6742f81..489044e0d1788 100644
--- a/eng/Version.Details.xml
+++ b/eng/Version.Details.xml
@@ -90,9 +90,9 @@
       <Sha>89be445dd4936157533ad96bafb95f701430653a</Sha>
       <SourceBuild RepoName="cecil" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100.Transport" Version="8.0.0-rtm.23469.3">
+    <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100.Transport" Version="8.0.0-rc.2.23469.1">
       <Uri>https://github.com/dotnet/emsdk</Uri>
-      <Sha>50bf805c8b5ca52abd34fde390609d8a54640246</Sha>
+      <Sha>ca3e8e40f417e421141638c66dd301395afee66d</Sha>
       <SourceBuild RepoName="emsdk" ManagedOnly="true" />
     </Dependency>
     <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-reference-packages" Version="8.0.0-alpha.1.23469.1">
diff --git a/eng/Versions.props b/eng/Versions.props
index 113383aab30cf..d56eeab17ef17 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -240,7 +240,7 @@
          Note: when the name is updated, make sure to update dependency name in eng/pipelines/common/xplat-setup.yml
                like - DarcDependenciesChanged.Microsoft_NET_Workload_Emscripten_Current_Manifest-8_0_100_Transport
     -->
-    <MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>8.0.0-rtm.23469.3</MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>
+    <MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>8.0.0-rc.2.23469.1</MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>
     <MicrosoftNETRuntimeEmscriptenVersion>$(MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion)</MicrosoftNETRuntimeEmscriptenVersion>
     <!-- workloads -->
     <SwixPackageVersion>1.1.87-gba258badda</SwixPackageVersion>

From dcdc5f5269c2cb44c92a8ddf7917eaa30a9ca248 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 23:11:15 -0700
Subject: [PATCH 2/9] Fix options Validation with objects have indexers
 (#92311)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Tarek Mahmoud Sayed <tarekms@microsoft.com>
Co-authored-by: Carlos Sánchez López <1175054+carlossanlop@users.noreply.github.com>
---
 .../src/DataAnnotationValidateOptions.cs      |  3 +-
 .../OptionsRuntimeTests.cs                    | 30 +++++++++++++++++++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/libraries/Microsoft.Extensions.Options.DataAnnotations/src/DataAnnotationValidateOptions.cs b/src/libraries/Microsoft.Extensions.Options.DataAnnotations/src/DataAnnotationValidateOptions.cs
index e8f4b606882cf..5b734edf32738 100644
--- a/src/libraries/Microsoft.Extensions.Options.DataAnnotations/src/DataAnnotationValidateOptions.cs
+++ b/src/libraries/Microsoft.Extensions.Options.DataAnnotations/src/DataAnnotationValidateOptions.cs
@@ -95,7 +95,8 @@ private static bool TryValidateOptions(object options, string qualifiedName, Lis
 
             foreach (PropertyInfo propertyInfo in options.GetType().GetProperties(BindingFlags.Instance | BindingFlags.Public))
             {
-                if (propertyInfo.GetMethod is null)
+                // Indexers are properties which take parameters. Ignore them.
+                if (propertyInfo.GetMethod is null || propertyInfo.GetMethod.GetParameters().Length > 0)
                 {
                     continue;
                 }
diff --git a/src/libraries/Microsoft.Extensions.Options/tests/SourceGeneration.Unit.Tests/OptionsRuntimeTests.cs b/src/libraries/Microsoft.Extensions.Options/tests/SourceGeneration.Unit.Tests/OptionsRuntimeTests.cs
index b644eea74120f..6109bccd29646 100644
--- a/src/libraries/Microsoft.Extensions.Options/tests/SourceGeneration.Unit.Tests/OptionsRuntimeTests.cs
+++ b/src/libraries/Microsoft.Extensions.Options/tests/SourceGeneration.Unit.Tests/OptionsRuntimeTests.cs
@@ -177,6 +177,30 @@ public void TestValidationWithEnumeration()
                         result2.Failures);
         }
 
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
+        public void TestObjectsWithIndexerProperties()
+        {
+            DataAnnotationValidateOptions<MyDictionaryOptions> dataAnnotationValidateOptions1 = new("MyDictionaryOptions");
+            MyDictionaryOptionsOptionsValidator sourceGenOptionsValidator1 = new();
+
+            var options1 = new MyDictionaryOptions();
+            ValidateOptionsResult result1 = sourceGenOptionsValidator1.Validate("MyDictionaryOptions", options1);
+            ValidateOptionsResult result2 = dataAnnotationValidateOptions1.Validate("MyDictionaryOptions", options1);
+
+            Assert.True(result1.Succeeded);
+            Assert.True(result2.Succeeded);
+
+            DataAnnotationValidateOptions<MyListOptions<string>> dataAnnotationValidateOptions2 = new("MyListOptions");
+            MyListOptionsOptionsValidator sourceGenOptionsValidator2 = new();
+
+            var options2 = new MyListOptions<string>() { Prop = "test" };
+            result1 = sourceGenOptionsValidator2.Validate("MyListOptions", options2);
+            result2 = dataAnnotationValidateOptions2.Validate("MyListOptions", options2);
+
+            Assert.True(result1.Succeeded);
+            Assert.True(result2.Succeeded);
+        }
+
         [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBrowser))]
         public void TestValidationWithCyclicReferences()
         {
@@ -302,6 +326,12 @@ public partial class MySourceGenOptionsValidator : IValidateOptions<MyOptions>
     {
     }
 
+    public class MyDictionaryOptions : Dictionary<string, string> { [Required] public string Prop { get; set; } = "test"; }
+    [OptionsValidator] public partial class MyDictionaryOptionsOptionsValidator : IValidateOptions<MyDictionaryOptions> { }
+
+    public class MyListOptions<T> : List<T> { [Required] public T Prop { get; set; } = default; }
+    [OptionsValidator] public partial class MyListOptionsOptionsValidator : IValidateOptions<MyListOptions<string>> { }
+
 #if NET8_0_OR_GREATER
     public class OptionsUsingNewAttributes
     {

From f6489fe1d785747308433213b4558171c63eef30 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 23:12:03 -0700
Subject: [PATCH 3/9] JIT: add missing xarch RMW case (#92293)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Handle the case where we're indirectly updating a local with a value
that is not a constant.

Fixes #92218.

Co-authored-by: Andy Ayers <andya@microsoft.com>
Co-authored-by: Carlos Sánchez López <1175054+carlossanlop@users.noreply.github.com>
---
 src/coreclr/jit/emitxarch.cpp                 |  7 ++++
 .../JitBlue/Runtime_92218/Runtime_92218.cs    | 40 +++++++++++++++++++
 .../Runtime_92218/Runtime_92218.csproj        |  8 ++++
 3 files changed, 55 insertions(+)
 create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.cs
 create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.csproj

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 980d40a47ac31..1c48d1c52f0bb 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -5485,6 +5485,13 @@ void emitter::emitInsRMW(instruction ins, emitAttr attr, GenTreeStoreInd* storeI
     {
         assert(!src->isContained()); // there must be one non-contained src
 
+        if (addr->isContained() && addr->OperIs(GT_LCL_ADDR))
+        {
+            GenTreeLclVarCommon* lclVar = addr->AsLclVarCommon();
+            emitIns_S_R(ins, attr, src->GetRegNum(), lclVar->GetLclNum(), lclVar->GetLclOffs());
+            return;
+        }
+
         // ind, reg
         id = emitNewInstrAmd(attr, offset);
         emitHandleMemOp(storeInd, id, emitInsModeFormat(ins, IF_ARD_RRD), ins);
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.cs b/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.cs
new file mode 100644
index 0000000000000..9b4696e31fc16
--- /dev/null
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.cs
@@ -0,0 +1,40 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using Xunit;
+
+public struct MutableStruct
+{
+    private long _internalValue;
+
+    public long InternalValue
+    {
+        get => Volatile.Read(ref _internalValue);
+        private set => Volatile.Write(ref _internalValue, value);
+    }
+
+    public void Add(long value) => AddInternal(value);
+    private void AddInternal(long value) => InternalValue += value;
+    public MutableStruct(long value) => InternalValue = value;
+}
+
+public static class Runtime_92218
+{
+    [Fact]
+    [MethodImpl(MethodImplOptions.AggressiveOptimization)]
+    public static void Problem()
+    {
+        var test = new MutableStruct(420);
+        var from = new MutableStruct(42);
+
+        var wrapper = -new TimeSpan(3);
+
+        while (test.InternalValue >= from.InternalValue)
+        {
+            test.Add(wrapper.Ticks);
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.csproj
new file mode 100644
index 0000000000000..15edd99711a1a
--- /dev/null
+++ b/src/tests/JIT/Regression/JitBlue/Runtime_92218/Runtime_92218.csproj
@@ -0,0 +1,8 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+</Project>
\ No newline at end of file

From 09429dc1204365c3c7de70233db5e7c4f13ce3b9 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 19 Sep 2023 23:12:45 -0700
Subject: [PATCH 4/9] Don't generate AddMask as it requires more explicit
 consideration of semantics (#92308)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Tanner Gooding <tagoo@outlook.com>
Co-authored-by: Carlos Sánchez López <1175054+carlossanlop@users.noreply.github.com>
---
 src/coreclr/jit/morph.cpp | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp
index 153f9b8bba8a8..3deada8eec085 100644
--- a/src/coreclr/jit/morph.cpp
+++ b/src/coreclr/jit/morph.cpp
@@ -10807,8 +10807,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
         }
 
 #if defined(TARGET_XARCH)
-        case NI_AVX512F_Add:
-        case NI_AVX512BW_Add:
         case NI_AVX512F_And:
         case NI_AVX512DQ_And:
         case NI_AVX512F_AndNot:
@@ -10850,13 +10848,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
 
             switch (intrinsicId)
             {
-                case NI_AVX512F_Add:
-                case NI_AVX512BW_Add:
-                {
-                    maskIntrinsicId = NI_AVX512F_AddMask;
-                    break;
-                }
-
                 case NI_AVX512F_And:
                 case NI_AVX512DQ_And:
                 {

From b0b595f388377c351ca980707424b2c66ed9ca2c Mon Sep 17 00:00:00 2001
From: Maoni Stephens <Maoni0@users.noreply.github.com>
Date: Wed, 20 Sep 2023 09:25:14 -0700
Subject: [PATCH 5/9] [release/8.0-rc2] porting DATAS change back to RC2
 (#92323)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* new synchronization mechanism for DATAS (#90726)

The current mechanism has a fundamental flaw which is the idling threads can start running at unpredictable times when they are woken up. This causes all sorts of problems. For example, when a thread gets here in gc_thread_function -

`if (n_heaps <= heap_number)`

if it's true it's supposed to wait. But its execution could be delayed so after it reads n_heaps it can stop for a while since no thread is waiting on this thread anyway... till some time later when a heap count changes happens again and it requires this thread to participating. And now this thread does the comparison and discovers that it needs to wait so it goes idle and all other threads will just be waiting for this thread to join.

Another example is it's not safe to change the heap count for a join from a larger one to a smaller one. It's fine to change from a smaller one to a larger one because all the threads participating will have to run in order for a join to finish. But if no one is waiting on a thread, it could just wake up from the event being set by the last thread joining and not run for a while. Then go back to the respin loop at a point where the color was changed and changed again! So now it thinks it can proceed with a join it does not belong to. And of course that wouldn't work.

The way threads are going idle/waking up is hard to keep track of - not only does it involve the gc_start_event and gc_idle_thread_event, it also uses WaitForGCEvent which is used by SuspendEE/RestartEE which in turn means whenever we want to call these we'd need to care about how that would affect this.

The new mechanism only uses gc_start_event and gc_idle_thread_event, but I changed gc_idle_thread_event to a per heap event. We can easily track which threads are going idling easily - whenever a thread is about to wait on the idle event, we increase the current idle_thread_count. And when we increase the heap count we only set the gc_idle_thread_event for the new heaps that are about to participate so we can deduct that many from idle_thread_count. There's a much simpler code path between "we know we don't need these threads anymore" to "these threads are at a known point" because the next time gc_start_event is set (ie, a GC is requested) we make sure to get these threads to a good known point, ie, we wait till all of them have completed increasing idle_thread_count.

Also fixed a couple of other problems that I hit while testing the new mechanism -

We are setting freeable_uoh_segment and freeable_soh_segment in decommission_heap to DECOMMISSIONED_REGION_P. And this causes us to simply lose the value for them. We should make sure we do push these to the free regions before we start changing the heap count.

We should also call background_delay_delete_uoh_segments before we start changing the heap count so we can get rid of the regions marked with heap_segment_flags_uoh_delete. If we allow these to be rearranged in equalize_promoted_bytes it means the order can change the invariant of the first region never being deleted no longer holds true and we can AV in this method.

I added an new method delay_free_segments to perform both tasks.

The accounting of generation_free_list_space is slightly off for LOH which causes us to hit assert (gen_size >= dd_fragmentation (dd)); in change_heap_count because we were not counting the loh_pad size.
I also disabled assert (free_list_space_decrease <= dd_fragmentation (dd)); for gen2 since I'm seeing this fired while I'm doing stress runs. I have yet to investigate this since I didn't want to add yet more changes to this PR.

* fixed problems with how sampling is done and how we suspend to change heap count in DATAS (#91712)

+ Moved the sample recording into when we are suspended. The way we were calculating the throughput cost was in check_heap_count (which is called right after we restart EE on heap0), we record the msl_wait_time (and reset it to 0 for soh/uoh). This is not synchronized with the allocating threads (which are already running at this point). So what can happen is the allocating threads are already accumulated more wait time which is attributed to this GC but it's not within the period we are counting for this GC (and we lose this part for the next GC). For BGC this is incorrect. If an ephemeral GC did happen before the BGC starts, we'd be adding a sample for that GC which is basically correct for that eph GC. But if an eph GC did not happen, we are just adding a random sample which is calculating the tcp as (msl wait + whatever GC that was finished before this BGC) so obviously incorrect.

+ Added gen2 sampling - this was adapted from Peter's gen2 sampling changes. This serves as a backstop in case the existing sampling doesn't ever pick gen2 GC costs. I made the following fixes -

1) changed the way we calculated the median

2) moved where this is calculated to again avoid timing issues

3) made the gen2 samples actually count instead of losing that info if we happen to sample when a gen2 didn't just occur.

+ Changed when check_heap_count is called - the previous place is right after a suspension which does not help with spacing the suspension time out (it was "suspend for GC" then "immediately suspend to change heap count"). And it caused a problem with BGC which was it always tried to change heap count when it couldn't because BGC was in progress. I changed this to be on a timeout to intentionally space the suspensions out. Now most of the time, heap count changes happen due to this time out. If we are really in a situation where GCs are happening too quickly and we return from waiting on the ee_suspend_event due to a GC started, we change the heap count right before we do a GC. So this also helps with the BGC problem.

* gen0_bricks_cleared flag needs to be propagated when we change heap count (#90457)

when we change the heap count, in heap X we get a region from heap Y and the gen0_bricks_cleared flag from Y says false but heap X says true. So when we check the bricks on heap X, we assume it’s true but it’s not.

the fix is to detect if any heap has this flag as false and if so make all heaps’ flag false (tracking which region is moved from which other heap is something we need additional recording for and it’s not really worth doing just for this)

* a logging change

---------

Co-authored-by: Maoni0 <maoni@microsoft.com>
---
 src/coreclr/gc/gc.cpp     | 1001 +++++++++++++++++++++++++------------
 src/coreclr/gc/gcconfig.h |    1 +
 src/coreclr/gc/gcpriv.h   |   93 +++-
 3 files changed, 746 insertions(+), 349 deletions(-)

diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp
index daeadfe9821b8..7351954070725 100644
--- a/src/coreclr/gc/gc.cpp
+++ b/src/coreclr/gc/gc.cpp
@@ -823,6 +823,11 @@ class t_join
         join_struct.r_join_lock = n_th;
     }
 
+    int get_num_threads()
+    {
+        return join_struct.n_threads;
+    }
+
     void destroy ()
     {
         dprintf (JOIN_LOG, ("Destroying join structure"));
@@ -887,6 +892,8 @@ class t_join
                 // avoid race due to the thread about to reset the event (occasionally) being preempted before ResetEvent()
                 if (color == join_struct.lock_color.LoadWithoutBarrier())
                 {
+                    dprintf (9999, ("---h%d %d j%d %d - respin!!! (c:%d-%d)",
+                        gch->heap_number, join_id, join_struct.n_threads, color, join_struct.lock_color.LoadWithoutBarrier()));
                     goto respin;
                 }
 
@@ -1117,6 +1124,25 @@ t_join bgc_t_join;
     } \
 }
 
+#define spin_and_wait(count_to_spin, expr) \
+{ \
+    while (!expr) \
+    { \
+        for (int j = 0; j < count_to_spin; j++) \
+        { \
+            if (expr) \
+            { \
+                break; \
+            } \
+                YieldProcessor (); \
+        } \
+        if (!(expr)) \
+        { \
+            GCToOSInterface::YieldThread (0); \
+        } \
+    } \
+}
+
 #ifdef BACKGROUND_GC
 
 #define max_pending_allocs 64
@@ -1429,8 +1455,6 @@ enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl)
     {
 #ifdef DYNAMIC_HEAP_COUNT
         uint64_t start = GetHighPrecisionTimeStamp();
-
-        msl->msl_wait_count++;
 #endif //DYNAMIC_HEAP_COUNT
 
         unsigned int i = 0;
@@ -1485,7 +1509,7 @@ enter_msl_status gc_heap::enter_spin_lock_msl_helper (GCSpinLock* msl)
 #ifdef DYNAMIC_HEAP_COUNT
         uint64_t end = GetHighPrecisionTimeStamp();
         Interlocked::ExchangeAdd64 (&msl->msl_wait_time, end - start);
-        dprintf (6666, ("wait for msl lock total time: %zd, total count: %zd, this time: %zd, this count: %u", msl->msl_wait_time, msl->msl_wait_count, end - start, i));
+        dprintf (3, ("h%d wait for msl lock wait time %zd, total wait time: %zd", heap_number, (end - start), msl->msl_wait_time));
 #endif //DYNAMIC_HEAP_COUNT
     }
     while (Interlocked::CompareExchange (&msl->lock, lock_taken, lock_free) != lock_free);
@@ -2318,9 +2342,6 @@ sorted_table* gc_heap::seg_table;
 
 #ifdef MULTIPLE_HEAPS
 GCEvent     gc_heap::ee_suspend_event;
-#ifdef DYNAMIC_HEAP_COUNT
-GCEvent     gc_heap::gc_idle_thread_event;
-#endif //DYNAMIC_HEAP_COUNT
 size_t      gc_heap::min_gen0_balance_delta = 0;
 size_t      gc_heap::min_balance_threshold = 0;
 #endif //MULTIPLE_HEAPS
@@ -2919,6 +2940,12 @@ BOOL gc_heap::should_expand_in_full_gc = FALSE;
 #ifdef DYNAMIC_HEAP_COUNT
 int gc_heap::dynamic_adaptation_mode = dynamic_adaptation_default;
 gc_heap::dynamic_heap_count_data_t SVR::gc_heap::dynamic_heap_count_data;
+uint64_t gc_heap::last_suspended_end_time = 0;
+size_t gc_heap::gc_index_full_gc_end = 0;
+
+#ifdef STRESS_DYNAMIC_HEAP_COUNT
+int gc_heap::heaps_in_this_gc = 0;
+#endif //STRESS_DYNAMIC_HEAP_COUNT
 #endif // DYNAMIC_HEAP_COUNT
 
 // Provisional mode related stuff.
@@ -6967,12 +6994,6 @@ BOOL gc_heap::create_thread_support (int number_of_heaps)
     {
         goto cleanup;
     }
-#ifdef DYNAMIC_HEAP_COUNT
-    if (!gc_idle_thread_event.CreateOSManualEventNoThrow (FALSE))
-    {
-        goto cleanup;
-    }
-#endif //DYNAMIC_HEAP_COUNT
     if (!ee_suspend_event.CreateOSAutoEventNoThrow (FALSE))
     {
         goto cleanup;
@@ -7020,10 +7041,6 @@ bool gc_heap::create_gc_thread ()
     return GCToEEInterface::CreateThread(gc_thread_stub, this, false, ".NET Server GC");
 }
 
-#ifdef DYNAMIC_HEAP_COUNT
-static size_t prev_change_heap_count_gc_index;
-#endif //DYNAMIC_HEAP_COUNT
-
 #ifdef _MSC_VER
 #pragma warning(disable:4715) //IA64 xcompiler recognizes that without the 'break;' the while(1) will never end and therefore not return a value for that code path
 #endif //_MSC_VER
@@ -7042,18 +7059,87 @@ void gc_heap::gc_thread_function ()
 
         if (heap_number == 0)
         {
-            uint32_t wait_result = gc_heap::ee_suspend_event.Wait(gradual_decommit_in_progress_p ? DECOMMIT_TIME_STEP_MILLISECONDS : INFINITE, FALSE);
+            bool wait_on_time_out_p = gradual_decommit_in_progress_p;
+            uint32_t wait_time = DECOMMIT_TIME_STEP_MILLISECONDS;
+#ifdef DYNAMIC_HEAP_COUNT
+            // background_running_p can only change from false to true during suspension.
+            if (!gc_heap::background_running_p () && dynamic_heap_count_data.should_change_heap_count)
+            {
+                assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);
+
+                dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
+                wait_time = min (wait_time, (uint32_t)(sample.elapsed_between_gcs / 1000 / 3));
+                wait_time = max (wait_time, 1);
+
+                dprintf (6666, ("gc#0 thread waiting for %d ms (betwen GCs %I64d)", wait_time, sample.elapsed_between_gcs));
+            }
+#endif //DYNAMIC_HEAP_COUNT
+            uint32_t wait_result = gc_heap::ee_suspend_event.Wait(wait_on_time_out_p ? wait_time : INFINITE, FALSE);
+            dprintf (9999, ("waiting for ee done res %d (timeout %d, %I64d ms since last suspend end)(should_change_heap_count is %d) (gradual_decommit_in_progress_p %d)",
+                wait_result, wait_time, ((GetHighPrecisionTimeStamp() - last_suspended_end_time) / 1000),
+                dynamic_heap_count_data.should_change_heap_count, gradual_decommit_in_progress_p));
             if (wait_result == WAIT_TIMEOUT)
             {
-                decommit_lock.Enter();
-                gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS);
-                decommit_lock.Leave();
+#ifdef DYNAMIC_HEAP_COUNT
+                if (dynamic_heap_count_data.should_change_heap_count)
+                {
+#ifdef BACKGROUND_GC
+                    if (!gc_heap::background_running_p ())
+#endif //BACKGROUND_GC
+                    {
+                        dprintf (6666, ("changing heap count due to timeout"));
+                        check_heap_count();
+                    }
+                }
+#endif //DYNAMIC_HEAP_COUNT
+
+                if (gradual_decommit_in_progress_p)
+                {
+                    decommit_lock.Enter ();
+                    gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS);
+                    decommit_lock.Leave ();
+                }
                 continue;
             }
 
+#ifdef DYNAMIC_HEAP_COUNT
+            // We might want to consider also doing this when a BGC finishes.
+            if (dynamic_heap_count_data.should_change_heap_count)
+            {
+#ifdef BACKGROUND_GC
+                if (!gc_heap::background_running_p ())
+#endif //BACKGROUND_GC
+                {
+                    // this was a request to do a GC so make sure we follow through with one.
+                    dprintf (6666, ("changing heap count at a GC start"));
+                    check_heap_count ();
+                }
+            }
+
+            // wait till the threads that should have gone idle at least reached the place where they are about to wait on the idle event.
+            if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && 
+                (n_heaps != dynamic_heap_count_data.last_n_heaps))
+            {
+                int spin_count = 1024;
+                int idle_thread_count = n_max_heaps - n_heaps;
+                dprintf (9999, ("heap count changed %d->%d, idle should be %d and is %d", dynamic_heap_count_data.last_n_heaps, n_heaps,
+                    idle_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
+                if (idle_thread_count != dynamic_heap_count_data.idle_thread_count)
+                {
+                    spin_and_wait (spin_count, (idle_thread_count == dynamic_heap_count_data.idle_thread_count));
+                    dprintf (9999, ("heap count changed %d->%d, now idle is %d", dynamic_heap_count_data.last_n_heaps, n_heaps,
+                        VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
+                }
+
+                dynamic_heap_count_data.last_n_heaps = n_heaps;
+            }
+#endif //DYNAMIC_HEAP_COUNT
+
             suspended_start_time = GetHighPrecisionTimeStamp();
             BEGIN_TIMING(suspend_ee_during_log);
+            dprintf (9999, ("h0 suspending EE in GC!"));
             GCToEEInterface::SuspendEE(SUSPEND_FOR_GC);
+            dprintf (9999, ("h0 suspended EE in GC!"));
             END_TIMING(suspend_ee_during_log);
 
             proceed_with_gc_p = TRUE;
@@ -7067,46 +7153,74 @@ void gc_heap::gc_thread_function ()
             {
                 settings.init_mechanisms();
 #ifdef DYNAMIC_HEAP_COUNT
-                // make sure the other gc threads cannot see this as a request to change heap count
-                // see explanation below about the cases when we return from gc_start_event.Wait
-                assert (dynamic_heap_count_data.new_n_heaps == n_heaps);
+                if (gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes)
+                {
+                    // make sure the other gc threads cannot see this as a request to change heap count
+                    // see explanation below about the cases when we return from gc_start_event.Wait
+                    assert (dynamic_heap_count_data.new_n_heaps == n_heaps);
+                }
 #endif //DYNAMIC_HEAP_COUNT
+                dprintf (9999, ("GC thread %d setting_gc_start_in_gc(h%d)", heap_number, n_heaps));
                 gc_start_event.Set();
             }
             dprintf (3, (ThreadStressLog::gcServerThread0StartMsg(), heap_number));
         }
         else
         {
+            dprintf (9999, ("GC thread %d waiting_for_gc_start(%d)(gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier(&settings.gc_index)));
             gc_start_event.Wait(INFINITE, FALSE);
 #ifdef DYNAMIC_HEAP_COUNT
-            // we have a couple different cases to handle here when we come back from the wait:
-            //   1. We are starting a GC. Signaled by dynamic_heap_count_data.new_n_heaps == n_heaps
-            //     a) We are starting a GC, but this thread is idle. Signaled by n_heaps <= heap_number
-            //     b) We are starting a GC, and this thread is participating. Signaled by heap_number < n_heaps
-            //   2. We are changing heap count. Signaled by dynamic_heap_count_data.new_n_heaps != n_heaps
-            //     a) We are changing heap count, but this thread is idle. Signaled by n_heaps <= heap_number.
-            //     b) We are changing heap count, and this thread is participating. Signaled by heap_number < n_heaps.
-
-            // check for 1.a) and 2.a) cases above
-            if (n_heaps <= heap_number)
-            {
-                dprintf (2, ("GC thread %d idle", heap_number));
-
-                // make sure GC is complete so we know the gc_idle_thread_event has been reset
-                g_theGCHeap->WaitUntilGCComplete();
+            dprintf (9999, ("GC thread %d waiting_done_gc_start(%d-%d)(i: %d)(gc%Id)",
+                heap_number, n_heaps, dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.init_only_p, VolatileLoadWithoutBarrier (&settings.gc_index)));
+
+            if ((gc_heap::dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) &&
+                (dynamic_heap_count_data.new_n_heaps != n_heaps))
+            {
+                // The reason why we need to do this is -
+                // + for threads that were participating, we need them to do work for change_heap_count
+                // + for threads that were not participating but will need to participate, we need to make sure they are woken now instead of
+                // randomly sometime later.
+                int old_n_heaps = n_heaps;
+                int new_n_heaps = dynamic_heap_count_data.new_n_heaps;
+                int num_threads_to_wake = max (new_n_heaps, old_n_heaps);
+                if (heap_number < num_threads_to_wake)
+                {
+                    dprintf (9999, ("h%d < %d, calling change", heap_number, num_threads_to_wake));
+                    change_heap_count (dynamic_heap_count_data.new_n_heaps);
+                    if (new_n_heaps < old_n_heaps)
+                    {
+                        dprintf (9999, ("h%d after change", heap_number));
+                        // at the end of change_heap_count we've changed join's heap count to the new one if it's smaller. So we need to make sure
+                        // only that many threads will participate in the following GCs.
+                        if (heap_number < new_n_heaps)
+                        {
+                            dprintf (9999, ("h%d < %d participating (dec)", heap_number, new_n_heaps));
+                        }
+                        else
+                        {
+                            Interlocked::Increment (&dynamic_heap_count_data.idle_thread_count);
+                            dprintf (9999, ("GC thread %d wait_on_idle(%d < %d)(gc%Id), total idle %d", heap_number, old_n_heaps, new_n_heaps,
+                                VolatileLoadWithoutBarrier (&settings.gc_index), VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
+                            gc_idle_thread_event.Wait (INFINITE, FALSE);
+                            dprintf (9999, ("GC thread %d waking_from_idle(%d)(gc%Id) after doing change", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
+                        }
+                    }
+                    else
+                    {
+                        dprintf (9999, ("h%d < %d participating (inc)", heap_number, new_n_heaps));
+                    }
+                }
+                else
+                {
+                    Interlocked::Increment (&dynamic_heap_count_data.idle_thread_count);
+                    dprintf (9999, ("GC thread %d wait_on_idle(< max %d)(gc%Id), total  idle %d", heap_number, num_threads_to_wake,
+                        VolatileLoadWithoutBarrier (&settings.gc_index), VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
+                    gc_idle_thread_event.Wait (INFINITE, FALSE);
+                    dprintf (9999, ("GC thread %d waking_from_idle(%d)(gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
+                }
 
-                // now wait on the gc_idle_thread_event
-                gc_idle_thread_event.Wait(INFINITE, FALSE);
-                dprintf (2, ("GC thread %d waking from idle", heap_number));
-                continue;
-            }
-            // case 2.b) above: is this a request to change heap count?
-            if (dynamic_heap_count_data.new_n_heaps != n_heaps)
-            {
-                change_heap_count (dynamic_heap_count_data.new_n_heaps);
                 continue;
             }
-            // case 1.b) above: we're starting a GC.
 #endif //DYNAMIC_HEAP_COUNT
             dprintf (3, (ThreadStressLog::gcServerThreadNStartMsg(), heap_number));
         }
@@ -7191,10 +7305,6 @@ void gc_heap::gc_thread_function ()
             {
                 gradual_decommit_in_progress_p = decommit_step (DECOMMIT_TIME_STEP_MILLISECONDS);
             }
-#ifdef DYNAMIC_HEAP_COUNT
-            // check if we should adjust the number of heaps
-            check_heap_count();
-#endif //DYNAMIC_HEAP_COUNT
         }
         else
         {
@@ -12527,6 +12637,16 @@ void gc_heap::rearrange_uoh_segments()
     freeable_uoh_segment = 0;
 }
 
+void gc_heap::delay_free_segments()
+{
+    rearrange_uoh_segments();
+#ifdef BACKGROUND_GC
+    background_delay_delete_uoh_segments();
+    if (!gc_heap::background_running_p())
+        rearrange_small_heap_segments();
+#endif //BACKGROUND_GC
+}
+
 #ifndef USE_REGIONS
 void gc_heap::rearrange_heap_segments(BOOL compacting)
 {
@@ -14860,6 +14980,25 @@ gc_heap::init_gc_heap (int h_number)
     gc_done_event_lock = -1;
     gc_done_event_set = false;
 
+#ifdef DYNAMIC_HEAP_COUNT
+    if (h_number != 0)
+    {
+        if (!gc_idle_thread_event.CreateAutoEventNoThrow (FALSE))
+        {
+            return 0;
+        }
+
+#ifdef BACKGROUND_GC
+        if (!bgc_idle_thread_event.CreateAutoEventNoThrow (FALSE))
+        {
+            return 0;
+        }
+#endif //BACKGROUND_GC
+
+        dprintf (9999, ("creating idle events for h%d", h_number));
+    }
+#endif //DYNAMIC_HEAP_COUNT
+
     if (!init_dynamic_data())
     {
         return 0;
@@ -16038,7 +16177,6 @@ void min_fl_list_info::thread_item_no_prev (uint8_t* item)
     tail = item;
 }
 
-// This is only implemented for gen2 right now!!!!
 // the min_fl_list array is arranged as chunks of n_heaps min_fl_list_info, the 1st chunk corresponds to the 1st bucket,
 // and so on.
 void allocator::rethread_items (size_t* num_total_fl_items, size_t* num_total_fl_items_rethreaded, gc_heap* current_heap,
@@ -17406,6 +17544,7 @@ BOOL gc_heap::a_fit_free_list_uoh_p (size_t size,
                                                 gen_number, align_const);
                 dd_new_allocation (dynamic_data_of (gen_number)) -= limit;
 
+                size_t saved_free_list_size = free_list_size;
 #ifdef FEATURE_LOH_COMPACTION
                 if (loh_pad)
                 {
@@ -17434,7 +17573,7 @@ BOOL gc_heap::a_fit_free_list_uoh_p (size_t size,
                 {
                     generation_free_obj_space (gen) += remain_size;
                 }
-                generation_free_list_space (gen) -= free_list_size;
+                generation_free_list_space (gen) -= saved_free_list_size;
                 assert ((ptrdiff_t)generation_free_list_space (gen) >= 0);
                 generation_free_list_allocated (gen) += limit;
 
@@ -22000,11 +22139,70 @@ BOOL gc_heap::should_proceed_with_gc()
 
 void gc_heap::update_end_gc_time_per_heap()
 {
+#ifdef DYNAMIC_HEAP_COUNT
+    size_t prev_gen2_end_time = 0;
+    if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes) && (settings.condemned_generation == max_generation))
+    {
+        dynamic_data* dd = dynamic_data_of (max_generation);
+        prev_gen2_end_time = dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd);;
+    }
+#endif //DYNAMIC_HEAP_COUNT
+
     for (int gen_number = 0; gen_number <= settings.condemned_generation; gen_number++)
     {
         dynamic_data* dd = dynamic_data_of (gen_number);
+
+        if (heap_number == 0)
+        {
+            dprintf (6666, ("prev gen%d GC end time: prev start %I64d + prev gc elapsed %Id = %I64d",
+                gen_number, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd))));
+        }
+
         dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd));
+
+        if (heap_number == 0)
+        {
+            dprintf (6666, ("updated NGC%d %Id elapsed time to %I64d - %I64d = %I64d", gen_number, dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd)));
+        }
+    }
+
+#ifdef DYNAMIC_HEAP_COUNT
+    if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes))
+    {
+        dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
+        sample.elapsed_between_gcs = end_gc_time - last_suspended_end_time;
+        sample.gc_pause_time = dd_gc_elapsed_time (dynamic_data_of (0));
+        sample.msl_wait_time = get_msl_wait_time();
+
+        dprintf (6666, ("sample#%d: this GC end %I64d - last sus end %I64d = %I64d, this GC pause %I64d, msl wait %I64d",
+            dynamic_heap_count_data.sample_index, end_gc_time, last_suspended_end_time, sample.elapsed_between_gcs, sample.gc_pause_time, sample.msl_wait_time));
+
+        last_suspended_end_time = end_gc_time;
+
+        GCEventFireHeapCountSample_V1 (
+            (uint64_t)VolatileLoadWithoutBarrier (&settings.gc_index),
+            sample.elapsed_between_gcs,
+            sample.gc_pause_time,
+            sample.msl_wait_time);
+
+        dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+
+        if (settings.condemned_generation == max_generation)
+        {
+            gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0));
+            size_t elapsed_between_gen2_gcs = end_gc_time - prev_gen2_end_time;
+            size_t gen2_elapsed_time = sample.gc_pause_time;
+            dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index] = (float)gen2_elapsed_time * 100.0f / elapsed_between_gen2_gcs;
+
+            dprintf (6666, ("gen2 sample#%d: this GC end %I64d - last gen2 end %I64d = %I64d, GC elapsed %I64d, percent %.3f",
+                dynamic_heap_count_data.gen2_sample_index, end_gc_time, prev_gen2_end_time, elapsed_between_gen2_gcs,
+                gen2_elapsed_time, dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index]));
+            dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+        }
+
+        calculate_new_heap_count ();
     }
+#endif //DYNAMIC_HEAP_COUNT
 }
 
 void gc_heap::update_end_ngc_time()
@@ -22151,7 +22349,31 @@ void gc_heap::gc1()
     {
         dynamic_data* dd = dynamic_data_of (n);
         end_gc_time = GetHighPrecisionTimeStamp();
+        size_t time_since_last_gen2 = 0;
+
+#ifdef DYNAMIC_HEAP_COUNT
+        if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes))
+        {
+            time_since_last_gen2 = (size_t)(end_gc_time - (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd)));
+            dprintf (6666, ("BGC %Id end %I64d - (prev gen2 start %I64d + elapsed %Id = %I64d) = time inbewteen gen2 %Id",
+                dd_gc_clock (dd), end_gc_time, dd_previous_time_clock (dd), dd_gc_elapsed_time (dd), (dd_previous_time_clock (dd) + dd_gc_elapsed_time (dd)), time_since_last_gen2));
+        }
+#endif //DYNAMIC_HEAP_COUNT
+
         dd_gc_elapsed_time (dd) = (size_t)(end_gc_time - dd_time_clock (dd));
+#ifdef DYNAMIC_HEAP_COUNT
+        if ((heap_number == 0) && (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes))
+        {
+            dprintf (6666, ("updating BGC %Id elapsed time to %I64d - %I64d = %I64d", dd_gc_clock (dd), end_gc_time, dd_time_clock (dd), dd_gc_elapsed_time (dd)));
+
+            float bgc_percent = (float)dd_gc_elapsed_time (dd) * 100.0f / (float)time_since_last_gen2;
+            dynamic_heap_count_data.gen2_gc_percents[dynamic_heap_count_data.gen2_sample_index] = bgc_percent;
+            dprintf (6666, ("gen2 sample %d elapsed %Id * 100 / time inbetween gen2 %Id = %.3f",
+                dynamic_heap_count_data.gen2_sample_index, dd_gc_elapsed_time (dd), time_since_last_gen2, bgc_percent));
+            dynamic_heap_count_data.gen2_sample_index = (dynamic_heap_count_data.gen2_sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+            gc_index_full_gc_end = dd_gc_clock (dynamic_data_of (0));
+        }
+#endif //DYNAMIC_HEAP_COUNT
 
 #ifdef HEAP_BALANCE_INSTRUMENTATION
         if (heap_number == 0)
@@ -22758,7 +22980,12 @@ void gc_heap::merge_fl_from_other_heaps (int gen_idx, int to_n_heaps, int from_n
         assert (free_list_space_decrease <= generation_free_list_space (gen));
         generation_free_list_space (gen) -= free_list_space_decrease;
 
-        assert (free_list_space_decrease <= dd_fragmentation (dd));
+        // TODO - I'm seeing for gen2 this is free_list_space_decrease can be a bit larger than frag.
+        // Need to fix this later.
+        if (gen_idx != max_generation)
+        {
+            assert (free_list_space_decrease <= dd_fragmentation (dd));
+        }
 
         size_t free_list_space_increase = 0;
         for (int from_hn = 0; from_hn < from_n_heaps; from_hn++)
@@ -23733,9 +23960,6 @@ void gc_heap::garbage_collect (int n)
 
 #ifdef MULTIPLE_HEAPS
             gc_start_event.Reset();
-#ifdef DYNAMIC_HEAP_COUNT
-            gc_idle_thread_event.Reset();
-#endif //DYNAMIC_HEAP_COUNT
             gc_t_join.restart();
 #endif //MULTIPLE_HEAPS
         }
@@ -23757,6 +23981,9 @@ void gc_heap::garbage_collect (int n)
 #endif // STRESS_HEAP
 
 #ifdef MULTIPLE_HEAPS
+#ifdef STRESS_DYNAMIC_HEAP_COUNT
+    Interlocked::Increment (&heaps_in_this_gc);
+#endif //STRESS_DYNAMIC_HEAP_COUNT
     //align all heaps on the max generation to condemn
     dprintf (3, ("Joining for max generation to condemn"));
     condemned_generation_num = generation_to_condemn (n,
@@ -23772,30 +23999,31 @@ void gc_heap::garbage_collect (int n)
 #endif //FEATURE_BASICFREEZE
 
 #ifdef MULTIPLE_HEAPS
+#ifdef STRESS_DYNAMIC_HEAP_COUNT
+        dprintf (9999, ("%d heaps, join sees %d, actually joined %d, %d idle threads (%d)",
+            n_heaps, gc_t_join.get_num_threads (), heaps_in_this_gc,
+            VolatileLoadWithoutBarrier(&dynamic_heap_count_data.idle_thread_count), (n_max_heaps - n_heaps)));
+        if (heaps_in_this_gc != n_heaps)
+        {
+            dprintf (9999, ("should have %d heaps but actually have %d!!", n_heaps, heaps_in_this_gc));
+            GCToOSInterface::DebugBreak ();
+        }
+
+        heaps_in_this_gc = 0;
+#endif //STRESS_DYNAMIC_HEAP_COUNT
+
         for (int i = 0; i < n_heaps; i++)
         {
             gc_heap* hp = g_heaps[i];
             // check for card table growth
             if (g_gc_card_table != hp->card_table)
                 hp->copy_brick_card_table();
-
-            hp->rearrange_uoh_segments();
-#ifdef BACKGROUND_GC
-            hp->background_delay_delete_uoh_segments();
-            if (!gc_heap::background_running_p())
-                hp->rearrange_small_heap_segments();
-#endif //BACKGROUND_GC
+            hp->delay_free_segments();
         }
 #else //MULTIPLE_HEAPS
         if (g_gc_card_table != card_table)
             copy_brick_card_table();
-
-        rearrange_uoh_segments();
-#ifdef BACKGROUND_GC
-        background_delay_delete_uoh_segments();
-        if (!gc_heap::background_running_p())
-            rearrange_small_heap_segments();
-#endif //BACKGROUND_GC
+        delay_free_segments();
 #endif //MULTIPLE_HEAPS
 
         BOOL should_evaluate_elevation = TRUE;
@@ -23882,10 +24110,8 @@ void gc_heap::garbage_collect (int n)
         do_pre_gc();
 
 #ifdef MULTIPLE_HEAPS
+        dprintf (9999, ("in GC, resetting gc_start"));
         gc_start_event.Reset();
-#ifdef DYNAMIC_HEAP_COUNT
-        gc_idle_thread_event.Reset();
-#endif //DYNAMIC_HEAP_COUNT
         dprintf(3, ("Starting all gc threads for gc"));
         gc_t_join.restart();
 #endif //MULTIPLE_HEAPS
@@ -24341,7 +24567,7 @@ void gc_heap::equalize_promoted_bytes(int condemned_gen_number)
     // hope is to achieve better work balancing in relocate and compact phases
     // this is also used when the heap count changes to balance regions between heaps
     int highest_gen_number = ((condemned_gen_number == max_generation) ?
-                              (total_generation_count - 1) : condemned_gen_number);
+        (total_generation_count - 1) : condemned_gen_number);
     int stop_gen_idx = get_stop_generation_index (condemned_gen_number);
 
     for (int gen_idx = highest_gen_number; gen_idx >= stop_gen_idx; gen_idx--)
@@ -25050,285 +25276,332 @@ void gc_heap::recommission_heap()
 #endif //RECORD_LOH_STATE
 }
 
-void gc_heap::check_heap_count ()
+float median_of_3 (float a, float b, float c)
 {
-    dynamic_heap_count_data.new_n_heaps = n_heaps;
+#define compare_and_swap(i, j)          \
+        {                               \
+            if (i < j)                  \
+            {                           \
+                float t = i;            \
+                          i = j;        \
+                              j = t;    \
+            }                           \
+        }
+    compare_and_swap (b, a);
+    compare_and_swap (c, a);
+    compare_and_swap (c, b);
+#undef compare_and_swap
+    return b;
+}
 
-    if (dynamic_adaptation_mode != dynamic_adaptation_to_application_sizes)
+size_t gc_heap::get_num_completed_gcs ()
+{
+    size_t num_completed_gcs = settings.gc_index;
+#ifdef BACKGROUND_GC
+    if (g_heaps[0]->is_bgc_in_progress ())
     {
-        return;
+        num_completed_gcs--;
+        dprintf (6666, ("BGC in prog, completed GCs -> %Id", num_completed_gcs));
     }
+#endif //BACKGROUND_GC
 
-    // we should be calling this only on the main GC thread
-    assert (heap_number == 0);
+    return num_completed_gcs;
+}
 
-    // acquire data for the current sample
-    uint64_t    soh_msl_wait_time = 0;
-    uint64_t    uoh_msl_wait_time = 0;
-    size_t      allocating_thread_count = 0;
-    size_t      heap_size = 0;
-    for (int i = 0; i < n_heaps; i++)
+int gc_heap::calculate_new_heap_count ()
+{
+    assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);
+
+    size_t num_completed_gcs = get_num_completed_gcs ();
+
+    dprintf (6666, ("current GC %Id(completed: %Id), prev completed GCs %Id, last full GC happened at index %Id",
+        VolatileLoadWithoutBarrier (&settings.gc_index), num_completed_gcs, dynamic_heap_count_data.prev_num_completed_gcs, gc_index_full_gc_end));
+
+    if (num_completed_gcs < (dynamic_heap_count_data.prev_num_completed_gcs + dynamic_heap_count_data_t::sample_size))
     {
-        gc_heap* hp = g_heaps[i];
+        dprintf (6666, ("not enough GCs, skipping"));
+        return n_heaps;
+    }
 
-        allocating_thread_count += hp->alloc_contexts_used;
+    float median_gen2_tcp_percent = 0.0f;
+    if (gc_index_full_gc_end >= (settings.gc_index - dynamic_heap_count_data_t::sample_size))
+    {
+        median_gen2_tcp_percent = dynamic_heap_count_data.get_median_gen2_gc_percent ();
+    }
 
-        soh_msl_wait_time += hp->more_space_lock_soh.msl_wait_time;
-        hp->more_space_lock_soh.msl_wait_time = 0;
-        hp->more_space_lock_soh.msl_wait_count = 0;
+    // If there was a blocking gen2 GC, the overhead would be very large and most likely we would not pick it. So we
+    // rely on the gen2 sample's overhead calculated above.
+    float throughput_cost_percents[dynamic_heap_count_data_t::sample_size];
+    for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++)
+    {
+        dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i];
+        throughput_cost_percents[i] = (sample.elapsed_between_gcs ? (((float)sample.msl_wait_time / n_heaps + sample.gc_pause_time) * 100.0f / (float)sample.elapsed_between_gcs) : 0.0f);
+        assert (throughput_cost_percents[i] >= 0.0);
+        if (throughput_cost_percents[i] > 100.0)
+            throughput_cost_percents[i] = 100.0;
+        dprintf (6666, ("sample %d: msl %I64d / %d + pause %I64d / elapsed %I64d = throughput_cost_percent: %.3f", i,
+            sample.msl_wait_time, n_heaps, sample.gc_pause_time, sample.elapsed_between_gcs, throughput_cost_percents[i]));
+    }
 
-        uoh_msl_wait_time += hp->more_space_lock_uoh.msl_wait_time;
-        hp->more_space_lock_uoh.msl_wait_time = 0;
-        hp->more_space_lock_uoh.msl_wait_count = 0;
+    float median_throughput_cost_percent = median_of_3 (throughput_cost_percents[0], throughput_cost_percents[1], throughput_cost_percents[2]);
+
+    // apply exponential smoothing and use 1/3 for the smoothing factor
+    const float smoothing = 3;
+    float smoothed_median_throughput_cost_percent = dynamic_heap_count_data.smoothed_median_throughput_cost_percent;
+    if (smoothed_median_throughput_cost_percent != 0.0f)
+    {
+        // average it with the previous value
+        smoothed_median_throughput_cost_percent = median_throughput_cost_percent / smoothing + (smoothed_median_throughput_cost_percent / smoothing) * (smoothing - 1);
+    }
+    else
+    {
+        smoothed_median_throughput_cost_percent = median_throughput_cost_percent;
+    }
+
+    dprintf (6666, ("median tcp: %.3f, smoothed tcp: %.3f, gen2 tcp %.3f(%.3f, %.3f, %.3f)",
+        median_throughput_cost_percent, smoothed_median_throughput_cost_percent, median_gen2_tcp_percent,
+        dynamic_heap_count_data.gen2_gc_percents[0], dynamic_heap_count_data.gen2_gc_percents[1], dynamic_heap_count_data.gen2_gc_percents[2]));
+
+    size_t heap_size = 0;
+    for (int i = 0; i < n_heaps; i++)
+    {
+        gc_heap* hp = g_heaps[i];
 
         for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
         {
             dynamic_data* dd = hp->dynamic_data_of (gen_idx);
 
             // estimate the size of each generation as the live data size plus the budget
-            heap_size += dd_promoted_size (dd) + dd_desired_allocation (dd);
-            dprintf (6666, ("h%d g%d promoted: %zd desired allocation: %zd", i, gen_idx, dd_promoted_size (dd), dd_desired_allocation (dd)));
+            heap_size += dd_current_size (dd) + dd_desired_allocation (dd);
+            dprintf (3, ("h%d g%d current: %zd desired allocation: %zd", i, gen_idx, dd_promoted_size (dd), dd_desired_allocation (dd)));
         }
     }
 
-    dynamic_data* hp0_dd0 = g_heaps[0]->dynamic_data_of (0);
+    // estimate the space cost of adding a heap as the min gen0 budget
+    size_t heap_space_cost_per_heap = dd_min_size (g_heaps[0]->dynamic_data_of (0));
 
-    // persist data for the current sample
-    dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[dynamic_heap_count_data.sample_index];
+    // compute the % space cost of adding a heap
+    float percent_heap_space_cost_per_heap = heap_space_cost_per_heap * 100.0f / heap_size;
 
-    sample.soh_msl_wait_time = soh_msl_wait_time / n_heaps;
-    sample.uoh_msl_wait_time = uoh_msl_wait_time / n_heaps;
-    sample.elapsed_between_gcs = dd_time_clock (hp0_dd0) - dd_previous_time_clock (hp0_dd0);
-    sample.gc_elapsed_time = dd_gc_elapsed_time (hp0_dd0);
-    sample.allocating_thread_count = allocating_thread_count;
-    sample.heap_size = heap_size;
+    // compute reasonable step sizes for the heap count
+    //
+    // on the way up, we essentially multiply the heap count by 1.5, so we go 1, 2, 3, 5, 8 ...
+    // we don't go all the way to the number of CPUs, but stay 1 or 2 short
+    int step_up = (n_heaps + 1) / 2;
+    int extra_heaps = 1 + (n_max_heaps >= 32);
+    step_up = min (step_up, n_max_heaps - extra_heaps - n_heaps);
 
-    dprintf (6666, ("sample %d: soh_msl_wait_time: %zd, uoh_msl_wait_time: %zd, elapsed_between_gcs: %zd, gc_elapsed_time: %d, heap_size: %zd MB",
-        dynamic_heap_count_data.sample_index,
-        sample.soh_msl_wait_time,
-        sample.uoh_msl_wait_time,
-        sample.elapsed_between_gcs,
-        sample.gc_elapsed_time,
-        sample.heap_size/(1024*1024)));
+    // on the way down, we essentially divide the heap count by 1.5
+    int step_down = (n_heaps + 1) / 3;
 
-    dynamic_heap_count_data.sample_index = (dynamic_heap_count_data.sample_index + 1) % dynamic_heap_count_data_t::sample_size;
+    // estimate the potential time benefit of going up a step
+    float tcp_reduction_per_step_up = smoothed_median_throughput_cost_percent * step_up / (n_heaps + step_up);
 
-    GCEventFireHeapCountSample_V1(
-        sample.gc_elapsed_time,
-        sample.soh_msl_wait_time,
-        sample.uoh_msl_wait_time,
-        sample.elapsed_between_gcs
-    );
+    // estimate the potential time cost of going down a step
+    float tcp_increase_per_step_down = smoothed_median_throughput_cost_percent * step_down / (n_heaps - step_down);
+
+    // estimate the potential space cost of going up a step
+    float scp_increase_per_step_up = percent_heap_space_cost_per_heap * step_up;
 
-    if (settings.gc_index < prev_change_heap_count_gc_index + 3)
+    // estimate the potential space saving of going down a step
+    float scp_decrease_per_step_down = percent_heap_space_cost_per_heap * step_down;
+
+    dprintf (6666, ("[CHP] u %d, d %d | space cost %Id / heap %Id(%.2fmb) = scp %.3f (u: %.3f, d: %.3f) | stcp %.3f, u * %.1f = %.3f, d * %.1f = %.3f",
+        step_up, step_down,
+        heap_space_cost_per_heap, heap_size, ((float)heap_size / (float)1000 / (float)1000), percent_heap_space_cost_per_heap,
+        scp_increase_per_step_up, scp_decrease_per_step_down,
+        smoothed_median_throughput_cost_percent,
+        ((float)step_up / (float)(n_heaps + step_up)), tcp_reduction_per_step_up,
+        ((float)step_down / (float)(n_heaps - step_down)), tcp_increase_per_step_down));
+
+#ifdef STRESS_DYNAMIC_HEAP_COUNT
+    // quick hack for initial testing
+    int new_n_heaps = (int)gc_rand::get_rand (n_max_heaps - 1) + 1;
+
+    // if we are adjusting down, make sure we adjust lower than the lowest uoh msl heap
+    if ((new_n_heaps < n_heaps) && (dynamic_heap_count_data.lowest_heap_with_msl_uoh != -1))
     {
-        // reconsider the decision every few gcs
-        return;
+        new_n_heaps = min (dynamic_heap_count_data.lowest_heap_with_msl_uoh, new_n_heaps);
+        new_n_heaps = max (new_n_heaps, 1);
     }
-
-    if (gc_heap::background_running_p())
+    dprintf (6666, ("stress %d -> %d", n_heaps, new_n_heaps));
+#else //STRESS_DYNAMIC_HEAP_COUNT
+    int new_n_heaps = n_heaps;
+    if (median_throughput_cost_percent > 10.0f)
     {
-        // can't have background gc running while we change the number of heaps
-        // so it's useless to compute a new number of heaps here
+        // ramp up more agressively - use as many heaps as it would take to bring
+        // the tcp down to 5%
+        new_n_heaps = (int)(n_heaps * (median_throughput_cost_percent / 5.0));
+        dprintf (6666, ("[CHP0] tcp %.3f -> %d * %.3f = %d", median_throughput_cost_percent, n_heaps, (median_throughput_cost_percent / 5.0), new_n_heaps));
+        new_n_heaps = min (new_n_heaps, n_max_heaps - extra_heaps);
     }
-    else
+    // if the median tcp is 10% or less, react slower
+    else if ((smoothed_median_throughput_cost_percent > 5.0f) || (median_gen2_tcp_percent > 10.0f))
     {
-        // compute the % overhead from msl waiting time and gc time for each of the samples
-        float percent_overhead[dynamic_heap_count_data_t::sample_size];
-        for (int i = 0; i < dynamic_heap_count_data_t::sample_size; i++)
-        {
-            dynamic_heap_count_data_t::sample& sample = dynamic_heap_count_data.samples[i];
-            uint64_t overhead_time = sample.soh_msl_wait_time + sample.uoh_msl_wait_time + sample.gc_elapsed_time;
-            percent_overhead[i] = overhead_time * 100.0f / sample.elapsed_between_gcs;
-            if (percent_overhead[i] < 0)
-                percent_overhead[i] = 0;
-            else if (percent_overhead[i] > 100)
-                percent_overhead[i] = 100;
-            dprintf (6666, ("sample %d: percent_overhead: %d%%", i, (int)percent_overhead[i]));
-        }
-        // compute the median of the percent overhead samples
-    #define compare_and_swap(i, j)                                       \
-        {                                                                \
-            if (percent_overhead[i] < percent_overhead[j])               \
-            {                                                            \
-                float t = percent_overhead[i];                           \
-                            percent_overhead[i] = percent_overhead[j];   \
-                                                percent_overhead[j] = t; \
-            }                                                            \
-        }
-        compare_and_swap (1, 0);
-        compare_and_swap (2, 0);
-        compare_and_swap (2, 1);
-    #undef compare_and_swap
-
-        // the middle element is the median overhead percentage
-        float median_percent_overhead = percent_overhead[1];
-
-        // apply exponential smoothing and use 1/3 for the smoothing factor
-        const float smoothing = 3;
-        float smoothed_median_percent_overhead = dynamic_heap_count_data.smoothed_median_percent_overhead;
-        if (smoothed_median_percent_overhead != 0.0f)
-        {
-            // average it with the previous value
-            smoothed_median_percent_overhead = median_percent_overhead / smoothing + (smoothed_median_percent_overhead / smoothing) * (smoothing - 1);
+        if (smoothed_median_throughput_cost_percent > 5.0f)
+        {
+            dprintf (6666, ("[CHP1] stcp %.3f > 5, %d + %d = %d", smoothed_median_throughput_cost_percent, n_heaps, step_up, (n_heaps + step_up)));
         }
         else
         {
-            // first time? initialize to the median
-            smoothed_median_percent_overhead = median_percent_overhead;
+            dprintf (6666, ("[CHP2] tcp %.3f > 10, %d + %d = %d", median_gen2_tcp_percent, n_heaps, step_up, (n_heaps + step_up)));
         }
+        new_n_heaps += step_up;
+    }
+    // if we can save at least 1% more in time than we spend in space, increase number of heaps
+    else if ((tcp_reduction_per_step_up - scp_increase_per_step_up) >= 1.0f)
+    {
+        dprintf (6666, ("[CHP3] % .3f - % .3f = % .3f, % d + % d = % d",
+            tcp_reduction_per_step_up, scp_increase_per_step_up, (tcp_reduction_per_step_up - scp_increase_per_step_up),
+            n_heaps, step_up, (n_heaps + step_up)));
+        new_n_heaps += step_up;
+    }
+    // if we can save at least 1% more in space than we spend in time, decrease number of heaps
+    else if ((smoothed_median_throughput_cost_percent < 1.0f) &&
+        (median_gen2_tcp_percent < 5.0f) &&
+        ((scp_decrease_per_step_down - tcp_increase_per_step_down) >= 1.0f))
+    {
+        dprintf (6666, ("[CHP4] stcp %.3f tcp %.3f, %.3f - %.3f = %.3f, %d + %d = %d",
+            smoothed_median_throughput_cost_percent, median_gen2_tcp_percent,
+            scp_decrease_per_step_down, tcp_increase_per_step_down, (scp_decrease_per_step_down - tcp_increase_per_step_down),
+            n_heaps, step_up, (n_heaps + step_up)));
+        new_n_heaps -= step_down;
+    }
 
-        dprintf (6666, ("median overhead: %d%% smoothed median overhead: %d%%", (int)(median_percent_overhead*1000), (int)(smoothed_median_percent_overhead*1000)));
-
-        // estimate the space cost of adding a heap as the min gen0 size
-        size_t heap_space_cost_per_heap = dd_min_size (hp0_dd0);
-
-        // compute the % space cost of adding a heap
-        float percent_heap_space_cost_per_heap = heap_space_cost_per_heap * 100.0f / heap_size;
-
-        // compute reasonable step sizes for the heap count
+    assert (new_n_heaps >= 1);
+    assert (new_n_heaps <= n_max_heaps);
+#endif //STRESS_DYNAMIC_HEAP_COUNT
 
-        // on the way up, we essentially multiply the heap count by 1.5, so we go 1, 2, 3, 5, 8 ...
-        // we don't go all the way to the number of CPUs, but stay 1 or 2 short
-        int step_up = (n_heaps + 1) / 2;
-        int extra_heaps = 1 + (n_max_heaps >= 32);
-        step_up = min (step_up, n_max_heaps - extra_heaps - n_heaps);
+    // store data used for decision to emit in ETW event
+    dynamic_heap_count_data.median_throughput_cost_percent = median_throughput_cost_percent;
+    dynamic_heap_count_data.smoothed_median_throughput_cost_percent = smoothed_median_throughput_cost_percent;
+    dynamic_heap_count_data.percent_heap_space_cost_per_heap = percent_heap_space_cost_per_heap;
+    dynamic_heap_count_data.tcp_reduction_per_step_up = tcp_reduction_per_step_up;
+    dynamic_heap_count_data.tcp_increase_per_step_down = tcp_increase_per_step_down;
+    dynamic_heap_count_data.scp_increase_per_step_up = scp_increase_per_step_up;
+    dynamic_heap_count_data.scp_decrease_per_step_down = scp_decrease_per_step_down;
+
+    GCEventFireHeapCountTuning_V1 (
+        (uint16_t)dynamic_heap_count_data.new_n_heaps,
+        (uint64_t)VolatileLoadWithoutBarrier (&settings.gc_index),
+        dynamic_heap_count_data.median_throughput_cost_percent,
+        dynamic_heap_count_data.smoothed_median_throughput_cost_percent,
+        dynamic_heap_count_data.tcp_reduction_per_step_up,
+        dynamic_heap_count_data.tcp_increase_per_step_down,
+        dynamic_heap_count_data.scp_increase_per_step_up,
+        dynamic_heap_count_data.scp_decrease_per_step_down
+    );
 
-        // on the way down, we essentially divide the heap count by 1.5
-        int step_down = (n_heaps + 1) / 3;
+    dynamic_heap_count_data.prev_num_completed_gcs = num_completed_gcs;
 
-        // estimate the potential time benefit of going up a step
-        float overhead_reduction_per_step_up = smoothed_median_percent_overhead * step_up / (n_heaps + step_up);
+    if (new_n_heaps != n_heaps)
+    {
+        dprintf (6666, ("should change! %d->%d", n_heaps, new_n_heaps));
+        dynamic_heap_count_data.heap_count_to_change_to = new_n_heaps;
+        dynamic_heap_count_data.should_change_heap_count = true;
+    }
 
-        // estimate the potential time cost of going down a step
-        float overhead_increase_per_step_down = smoothed_median_percent_overhead * step_down / (n_heaps - step_down);
+    return new_n_heaps;
+}
 
-        // estimate the potential space cost of going up a step
-        float space_cost_increase_per_step_up = percent_heap_space_cost_per_heap * step_up;
+void gc_heap::check_heap_count ()
+{
+    dynamic_heap_count_data.new_n_heaps = dynamic_heap_count_data.heap_count_to_change_to;
 
-        // estimate the potential space saving of going down a step
-        float space_cost_decrease_per_step_down = percent_heap_space_cost_per_heap * step_down;
+    assert (dynamic_heap_count_data.new_n_heaps != n_heaps);
 
-#ifdef STRESS_DYNAMIC_HEAP_COUNT
-        // quick hack for initial testing
-        int new_n_heaps = (int)gc_rand::get_rand (n_max_heaps - 1) + 1;
+    if (dynamic_heap_count_data.new_n_heaps != n_heaps)
+    {
+        dprintf (9999, ("h0 suspending EE in check"));
+        // can't have threads allocating while we change the number of heaps
+        GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP);
+        dprintf (9999, ("h0 suspended EE in check"));
 
-        // if we are adjusting down, make sure we adjust lower than the lowest uoh msl heap
-        if ((new_n_heaps < n_heaps) && (dynamic_heap_count_data.lowest_heap_with_msl_uoh != -1))
+#ifdef BACKGROUND_GC
+        if (gc_heap::background_running_p())
         {
-            new_n_heaps = min (dynamic_heap_count_data.lowest_heap_with_msl_uoh, new_n_heaps);
+            // background GC is running - reset the new heap count
+            dynamic_heap_count_data.new_n_heaps = n_heaps;
+            dprintf (6666, ("can't change heap count! BGC in progress"));
 
-            // but not down to zero, obviously...
-            new_n_heaps = max (new_n_heaps, 1);
-        }
-#else //STRESS_DYNAMIC_HEAP_COUNT
-        int new_n_heaps = n_heaps;
-        if (median_percent_overhead > 10.0f)
-        {
-            // ramp up more agressively - use as many heaps as it would take to bring
-            // the overhead down to 5%
-            new_n_heaps = (int)(n_heaps * (median_percent_overhead / 5.0));
-            new_n_heaps = min (new_n_heaps, n_max_heaps - extra_heaps);
-        }
-        // if the median overhead is 10% or less, react slower
-        else if (smoothed_median_percent_overhead > 5.0f)
-        {
-            new_n_heaps += step_up;
-        }
-        // if we can save at least 1% more in time than we spend in space, increase number of heaps
-        else if (overhead_reduction_per_step_up - space_cost_increase_per_step_up >= 1.0f)
-        {
-            new_n_heaps += step_up;
-        }
-        // if we can save at least 1% more in space than we spend in time, decrease number of heaps
-        else if (smoothed_median_percent_overhead < 1.0f && space_cost_decrease_per_step_down - overhead_increase_per_step_down >= 1.0f)
-        {
-            new_n_heaps -= step_down;
+            GCToEEInterface::RestartEE(TRUE);
         }
+#endif //BACKGROUND_GC
+    }
 
-        dprintf (6666, ("or: %d, si: %d,  sd: %d, oi: %d => %d -> %d",
-            (int)overhead_reduction_per_step_up,
-            (int)space_cost_increase_per_step_up,
-            (int)space_cost_decrease_per_step_down,
-            (int)overhead_increase_per_step_down,
-            n_heaps,
-            new_n_heaps));
-
-        assert (1 <= new_n_heaps);
-        assert (new_n_heaps <= n_max_heaps);
-#endif //STRESS_DYNAMIC_HEAP_COUNT
-
-        dynamic_heap_count_data.new_n_heaps = new_n_heaps;
-
-        // store data used for decision to emit in ETW event
-        dynamic_heap_count_data.median_percent_overhead           = median_percent_overhead;
-        dynamic_heap_count_data.smoothed_median_percent_overhead  = smoothed_median_percent_overhead;
-        dynamic_heap_count_data.percent_heap_space_cost_per_heap  = percent_heap_space_cost_per_heap;
-        dynamic_heap_count_data.overhead_reduction_per_step_up    = overhead_reduction_per_step_up;
-        dynamic_heap_count_data.overhead_increase_per_step_down   = overhead_increase_per_step_down;
-        dynamic_heap_count_data.space_cost_increase_per_step_up   = space_cost_increase_per_step_up;
-        dynamic_heap_count_data.space_cost_decrease_per_step_down = space_cost_decrease_per_step_down;
-
-        GCEventFireHeapCountTuning_V1(
-            (uint16_t)dynamic_heap_count_data.new_n_heaps,
-            (uint64_t)VolatileLoad(&settings.gc_index),
-            dynamic_heap_count_data.median_percent_overhead,
-            dynamic_heap_count_data.smoothed_median_percent_overhead,
-            dynamic_heap_count_data.overhead_reduction_per_step_up,
-            dynamic_heap_count_data.overhead_increase_per_step_down,
-            dynamic_heap_count_data.space_cost_increase_per_step_up,
-            dynamic_heap_count_data.space_cost_decrease_per_step_down
-        );
-
-        if (new_n_heaps != n_heaps)
+    if (dynamic_heap_count_data.new_n_heaps != n_heaps)
+    {
+        dprintf (6666, ("prep to change from %d to %d", n_heaps, dynamic_heap_count_data.new_n_heaps));
+        if (!prepare_to_change_heap_count (dynamic_heap_count_data.new_n_heaps))
         {
-            // can't have threads allocating while we change the number of heaps
-            GCToEEInterface::SuspendEE(SUSPEND_FOR_GC_PREP);
-
-            if (gc_heap::background_running_p())
-            {
-                // background GC is running - reset the new heap count
-                dynamic_heap_count_data.new_n_heaps = n_heaps;
-
-                GCToEEInterface::RestartEE(TRUE);
-            }
+            // we don't have sufficient resources - reset the new heap count
+            dynamic_heap_count_data.new_n_heaps = n_heaps;
         }
     }
 
     if (dynamic_heap_count_data.new_n_heaps == n_heaps)
     {
         // heap count stays the same, no work to do
-        dprintf (6666, ("heap count stays the same, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, n_heaps));
+        dynamic_heap_count_data.prev_num_completed_gcs = get_num_completed_gcs ();
+        dynamic_heap_count_data.should_change_heap_count = false;
 
-        // come back after 3 GCs to reconsider
-        prev_change_heap_count_gc_index = settings.gc_index;
+        dprintf (6666, ("heap count stays the same %d, no work to do, set prev completed to %Id", dynamic_heap_count_data.new_n_heaps, dynamic_heap_count_data.prev_num_completed_gcs));
 
         return;
     }
 
-    if (GCScan::GetGcRuntimeStructuresValid())
+    int new_n_heaps = dynamic_heap_count_data.new_n_heaps;
+
+    assert (!(dynamic_heap_count_data.init_only_p));
+
     {
+        // At this point we are guaranteed to be able to change the heap count to the new one.
+        // Change the heap count for joins here because we will need to join new_n_heaps threads together.
+        dprintf (9999, ("changing join hp %d->%d", n_heaps, new_n_heaps));
+        int max_threads_to_wake = max (n_heaps, new_n_heaps);
+        gc_t_join.update_n_threads (max_threads_to_wake);
+
         // make sure the other gc threads cannot see this as a request to GC
         assert (dynamic_heap_count_data.new_n_heaps != n_heaps);
+
+        if (n_heaps < new_n_heaps)
+        {
+            int saved_idle_thread_count = dynamic_heap_count_data.idle_thread_count;
+            Interlocked::ExchangeAdd (&dynamic_heap_count_data.idle_thread_count, (n_heaps - new_n_heaps));
+            dprintf (9999, ("GC thread %d setting idle events for h%d-h%d, total idle %d -> %d", heap_number, n_heaps, (new_n_heaps - 1),
+                saved_idle_thread_count, VolatileLoadWithoutBarrier (&dynamic_heap_count_data.idle_thread_count)));
+
+            for (int heap_idx = n_heaps; heap_idx < new_n_heaps; heap_idx++)
+            {
+                g_heaps[heap_idx]->gc_idle_thread_event.Set();
+#ifdef BACKGROUND_GC
+                g_heaps[heap_idx]->bgc_idle_thread_event.Set();
+#endif //BACKGROUND_GC
+            }
+        }
+
         gc_start_event.Set();
     }
 
     int old_n_heaps = n_heaps;
 
+    (dynamic_heap_count_data.heap_count_change_count)++;
     change_heap_count (dynamic_heap_count_data.new_n_heaps);
 
     GCToEEInterface::RestartEE(TRUE);
-    prev_change_heap_count_gc_index = settings.gc_index;
+    dprintf (9999, ("h0 restarted EE"));
 
     // we made changes to the heap count that will change the overhead,
     // so change the smoothed overhead to reflect that
-    int new_n_heaps = n_heaps;
-    dynamic_heap_count_data.smoothed_median_percent_overhead  = dynamic_heap_count_data.smoothed_median_percent_overhead/new_n_heaps*old_n_heaps;
+    dynamic_heap_count_data.smoothed_median_throughput_cost_percent = dynamic_heap_count_data.smoothed_median_throughput_cost_percent / n_heaps * old_n_heaps;
+
+    dprintf (6666, ("h0 finished changing, set should change to false!"));
+    dynamic_heap_count_data.should_change_heap_count = false;
 }
 
 bool gc_heap::prepare_to_change_heap_count (int new_n_heaps)
 {
-    dprintf (6666, ("trying to change heap count %d -> %d", n_heaps, new_n_heaps));
+    dprintf (9999, ("trying to change heap count %d -> %d", n_heaps, new_n_heaps));
 
     // use this variable for clarity - n_heaps will change during the transition
     int old_n_heaps = n_heaps;
@@ -25371,6 +25644,17 @@ bool gc_heap::prepare_to_change_heap_count (int new_n_heaps)
         }
     }
 
+    // Before we look at whether we have sufficient regions we should return regions that should be deleted to free
+    // so we don't lose them when we decommission heaps. We could do this for only heaps that we are about
+    // to decomission. But it's better to do this for all heaps because we don't need to worry about adding them to the
+    // heaps remain (freeable uoh/soh regions) and we get rid of regions with the heap_segment_flags_uoh_delete flag
+    // because background_delay_delete_uoh_segments makes the assumption it can't be the start region.
+    for (int i = 0; i < old_n_heaps; i++)
+    {
+        gc_heap* hp = g_heaps[i];
+        hp->delay_free_segments ();
+    }
+
     // if we want to increase the number of heaps, we have to make sure we can give
     // each heap a region for each generation. If we cannot do that, we have to give up
     ptrdiff_t region_count_in_gen[total_generation_count];
@@ -25451,39 +25735,34 @@ bool gc_heap::prepare_to_change_heap_count (int new_n_heaps)
 
 bool gc_heap::change_heap_count (int new_n_heaps)
 {
+    dprintf (9999, ("BEG heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps));
+
     // use this variable for clarity - n_heaps will change during the transition
     int old_n_heaps = n_heaps;
+    bool init_only_p = dynamic_heap_count_data.init_only_p;
 
-    if (heap_number == 0)
     {
-        if (!prepare_to_change_heap_count (new_n_heaps))
-        {
-            // we don't have sufficient resources - reset the new heap count
-            dynamic_heap_count_data.new_n_heaps = n_heaps;
-        }
-    }
-
-    if (GCScan::GetGcRuntimeStructuresValid())
-    {
-        // join for sufficient resources decision
         gc_t_join.join (this, gc_join_merge_temp_fl);
         if (gc_t_join.joined ())
         {
+            // BGC is not running, we can safely change its join's heap count.
+#ifdef BACKGROUND_GC
+            bgc_t_join.update_n_threads (new_n_heaps);
+#endif //BACKGROUND_GC
+
+            dynamic_heap_count_data.init_only_p = false;
+            dprintf (9999, ("in change h%d resetting gc_start, update bgc join to %d heaps", heap_number, new_n_heaps));
             gc_start_event.Reset();
             gc_t_join.restart ();
         }
     }
 
-    // gc_heap::n_heaps may have changed by now, compare to the snapshot *before* the join
-    if (dynamic_heap_count_data.new_n_heaps == old_n_heaps)
-    {
-        dprintf (6666, ("failed to change heap count, no work to do %d == %d", dynamic_heap_count_data.new_n_heaps, old_n_heaps));
-        return false;
-    }
+    assert (dynamic_heap_count_data.new_n_heaps != old_n_heaps);
+
+    dprintf (9999, ("Waiting h0 heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps));
 
     if (heap_number == 0)
     {
-        // after having checked for sufficient resources, we are now committed to actually change the heap count
         dprintf (3, ("switching heap count from %d to %d heaps", old_n_heaps, new_n_heaps));
 
         // spread finalization data out to heaps coming into service
@@ -25504,17 +25783,23 @@ bool gc_heap::change_heap_count (int new_n_heaps)
             from_heap_number = (from_heap_number + 1) % old_n_heaps;
         }
 
-        // prepare for the switch by fixing the allocation contexts on the old heaps,
+        // prepare for the switch by fixing the allocation contexts on the old heaps, unify the gen0_bricks_cleared flag,
         // and setting the survived size for the existing regions to their allocated size
+        BOOL unified_gen0_bricks_cleared = TRUE;
         for (int i = 0; i < old_n_heaps; i++)
         {
             gc_heap* hp = g_heaps[i];
 
-            if (GCScan::GetGcRuntimeStructuresValid())
+            if (!init_only_p)
             {
                 hp->fix_allocation_contexts (TRUE);
             }
 
+            if (unified_gen0_bricks_cleared && (hp->gen0_bricks_cleared == FALSE))
+            {
+                unified_gen0_bricks_cleared = FALSE;
+            }
+
             for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
             {
                 generation* gen = hp->generation_of (gen_idx);
@@ -25614,7 +25899,7 @@ bool gc_heap::change_heap_count (int new_n_heaps)
                 hpd->free_regions[kind].transfer_regions(&hp->free_regions[kind]);
             }
         }
-        // update number of heaps
+        dprintf (9999, ("h%d changing %d->%d", heap_number, n_heaps, new_n_heaps));
         n_heaps = new_n_heaps;
 
         // even out the regions over the current number of heaps
@@ -25625,6 +25910,8 @@ bool gc_heap::change_heap_count (int new_n_heaps)
         {
             gc_heap* hp = g_heaps[i];
 
+            hp->gen0_bricks_cleared = unified_gen0_bricks_cleared;
+
             // establish invariants regarding the ephemeral segment
             generation* gen0 = hp->generation_of (0);
             if ((hp->ephemeral_heap_segment == nullptr) ||
@@ -25653,7 +25940,9 @@ bool gc_heap::change_heap_count (int new_n_heaps)
         }
     }
 
-    if (GCScan::GetGcRuntimeStructuresValid())
+    dprintf (3, ("individual heap%d changing %d->%d", heap_number, n_heaps, new_n_heaps));
+
+    if (!init_only_p)
     {
         // join for rethreading the free lists
         gc_t_join.join (this, gc_join_merge_temp_fl);
@@ -25665,7 +25954,11 @@ bool gc_heap::change_heap_count (int new_n_heaps)
         // rethread the free lists
         for (int gen_idx = 0; gen_idx < total_generation_count; gen_idx++)
         {
-            rethread_fl_items (gen_idx);
+            if (heap_number < old_n_heaps)
+            {
+                dprintf (3, ("h%d calling per heap work!", heap_number));
+                rethread_fl_items (gen_idx);
+            }
 
             // join for merging the free lists
             gc_t_join.join (this, gc_join_merge_temp_fl);
@@ -25676,18 +25969,14 @@ bool gc_heap::change_heap_count (int new_n_heaps)
                 gc_t_join.restart ();
             }
         }
+#ifdef BACKGROUND_GC
         // there should be no items in the bgc_alloc_lock
         bgc_alloc_lock->check();
+#endif //BACKGROUND_GC
     }
 
     if (heap_number == 0)
     {
-        // udate the number of heaps in the joins
-        gc_t_join.update_n_threads(new_n_heaps);
-    #ifdef BACKGROUND_GC
-        bgc_t_join.update_n_threads(new_n_heaps);
-    #endif //BACKGROUND_GC
-
         // compute the total budget per generation over the old heaps
         // and figure out what the new budget per heap is
         ptrdiff_t budget_per_heap[total_generation_count];
@@ -25747,21 +26036,50 @@ bool gc_heap::change_heap_count (int new_n_heaps)
             hp->decommission_heap();
         }
 
-        if (GCScan::GetGcRuntimeStructuresValid())
+        if (!init_only_p)
         {
             // make sure no allocation contexts point to idle heaps
             fix_allocation_contexts_heaps();
         }
 
-        if (old_n_heaps < new_n_heaps)
+        dynamic_heap_count_data.last_n_heaps = old_n_heaps;
+    }
+
+    // join the last time to change the heap count again if needed.
+    if (new_n_heaps < old_n_heaps)
+    {
+        gc_t_join.join (this, gc_join_merge_temp_fl);
+        if (gc_t_join.joined ())
         {
-            // wake up threads for the new heaps
-            gc_idle_thread_event.Set();
+            dprintf (9999, ("now changing the join heap count to the smaller one %d", new_n_heaps));
+            gc_t_join.update_n_threads (new_n_heaps);
+
+            gc_t_join.restart ();
         }
     }
 
     return true;
 }
+
+size_t gc_heap::get_msl_wait_time()
+{
+    assert (dynamic_adaptation_mode == dynamic_adaptation_to_application_sizes);
+
+    size_t msl_wait_since_pause = 0;
+
+    for (int i = 0; i < n_heaps; i++)
+    {
+        gc_heap* hp = g_heaps[i];
+
+        msl_wait_since_pause += hp->more_space_lock_soh.msl_wait_time;
+        hp->more_space_lock_soh.msl_wait_time = 0;
+
+        msl_wait_since_pause += hp->more_space_lock_uoh.msl_wait_time;
+        hp->more_space_lock_uoh.msl_wait_time = 0;
+    }
+
+    return msl_wait_since_pause;
+}
 #endif //DYNAMIC_HEAP_COUNT
 #endif //USE_REGIONS
 
@@ -32805,17 +33123,17 @@ void gc_heap::plan_phase (int condemned_gen_number)
         }
         else
         {
-            dprintf (2, ("gen2 didn't grow (end seg alloc: %zd, , condemned alloc: %zd, gen1 c alloc: %zd",
+            dprintf (1, ("gen2 didn't grow (end seg alloc: %zd, , condemned alloc: %zd, gen1 c alloc: %zd",
                          end_seg_allocated, condemned_allocated,
                          generation_condemned_allocated (generation_of (max_generation - 1))));
         }
 
-        dprintf (1, ("older gen's free alloc: %zd->%zd, seg alloc: %zd->%zd, condemned alloc: %zd->%zd",
+        dprintf (2, ("older gen's free alloc: %zd->%zd, seg alloc: %zd->%zd, condemned alloc: %zd->%zd",
                     r_older_gen_free_list_allocated, generation_free_list_allocated (older_gen),
                     r_older_gen_end_seg_allocated, generation_end_seg_allocated (older_gen),
                     r_older_gen_condemned_allocated, generation_condemned_allocated (older_gen)));
 
-        dprintf (1, ("this GC did %zd free list alloc(%zd bytes free space rejected)",
+        dprintf (2, ("this GC did %zd free list alloc(%zd bytes free space rejected)",
             free_list_allocated, rejected_free_space));
 
         maxgen_size_increase* maxgen_size_info = &(get_gc_data_per_heap()->maxgen_size_info);
@@ -38908,9 +39226,9 @@ void gc_heap::bgc_thread_function()
         {
             // this is the case where we have more background GC threads than heaps
             // - wait until we're told to continue...
-            dprintf (3, ("BGC thread %d idle", heap_number));
-            gc_idle_thread_event.Wait(INFINITE, FALSE);
-            dprintf (3, ("BGC thread %d waking from idle", heap_number));
+            dprintf (9999, ("BGC thread %d idle (%d heaps) (gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
+            bgc_idle_thread_event.Wait(INFINITE, FALSE);
+            dprintf (9999, ("BGC thread %d waking from idle (%d heaps) (gc%Id)", heap_number, n_heaps, VolatileLoadWithoutBarrier (&settings.gc_index)));
             continue;
         }
 #endif //DYNAMIC_HEAP_COUNT
@@ -38982,7 +39300,7 @@ void gc_heap::bgc_thread_function()
             dprintf (SPINLOCK_LOG, ("bgc Lgc"));
             leave_spin_lock (&gc_lock);
 #ifdef MULTIPLE_HEAPS
-            dprintf(1, ("End of BGC - starting all BGC threads"));
+            dprintf(1, ("End of BGC"));
             bgc_t_join.restart();
 #endif //MULTIPLE_HEAPS
         }
@@ -42859,6 +43177,9 @@ bool gc_heap::init_dynamic_data()
     {
         process_start_time = now;
         smoothed_desired_total[0] = dynamic_data_of (0)->min_size * n_heaps;
+#ifdef DYNAMIC_HEAP_COUNT
+        last_suspended_end_time = now;
+#endif //DYNAMIC_HEAP_COUNT
 #ifdef HEAP_BALANCE_INSTRUMENTATION
         last_gc_end_time_us = now;
         dprintf (HEAP_BALANCE_LOG, ("qpf=%zd, start: %zd(%d)", qpf, start_raw_ts, now));
@@ -47957,6 +48278,7 @@ HRESULT GCHeap::Initialize()
 
     uint32_t nhp = 1;
     uint32_t nhp_from_config = 0;
+    uint32_t max_nhp_from_config = (uint32_t)GCConfig::GetMaxHeapCount();
 
 #ifndef MULTIPLE_HEAPS
     GCConfig::SetServerGC(false);
@@ -48151,6 +48473,10 @@ HRESULT GCHeap::Initialize()
 
 #ifdef MULTIPLE_HEAPS
     assert (nhp <= g_num_processors);
+    if (max_nhp_from_config)
+    {
+        nhp = min (nhp, max_nhp_from_config);
+    }
     gc_heap::n_max_heaps = nhp;
     gc_heap::n_heaps = nhp;
     hr = gc_heap::initialize_gc (seg_size, large_seg_size, pin_seg_size, nhp);
@@ -48301,9 +48627,32 @@ HRESULT GCHeap::Initialize()
         {
             // start with only 1 heap
             gc_heap::smoothed_desired_total[0] /= gc_heap::n_heaps;
-            gc_heap::g_heaps[0]->change_heap_count (1);
+            int initial_n_heaps = 1;
+            dprintf (9999, ("gc_heap::n_heaps is %d, initial %d", gc_heap::n_heaps, initial_n_heaps));
+
+            {
+                if (!gc_heap::prepare_to_change_heap_count (initial_n_heaps))
+                {
+                    // we don't have sufficient resources.
+                    return E_FAIL;
+                }
+
+                gc_heap::dynamic_heap_count_data.new_n_heaps = initial_n_heaps;
+                gc_heap::dynamic_heap_count_data.idle_thread_count = 0;
+                gc_heap::dynamic_heap_count_data.init_only_p = true;
+
+                int max_threads_to_wake = max (gc_heap::n_heaps, initial_n_heaps);
+                gc_t_join.update_n_threads (max_threads_to_wake);
+                gc_heap::gc_start_event.Set ();
+            }
+
+            gc_heap::g_heaps[0]->change_heap_count (initial_n_heaps);
+            gc_heap::gc_start_event.Reset ();
+
+            // This needs to be different from our initial heap count so we can make sure we wait for
+            // the idle threads correctly in gc_thread_function.
+            gc_heap::dynamic_heap_count_data.last_n_heaps = 0;
         }
-        gc_heap::dynamic_heap_count_data.new_n_heaps = gc_heap::n_heaps;
 #endif //DYNAMIC_HEAP_COUNT
         GCScan::GcRuntimeStructuresValid (TRUE);
 
@@ -49875,10 +50224,16 @@ void gc_heap::do_post_gc()
     }
 #endif //BGC_SERVO_TUNING
 
+#ifdef BACKGROUND_GC
+    const char* str_gc_type = (settings.concurrent ? "BGC" : (gc_heap::background_running_p () ? "FGC" : "NGC"));
+#else
+    const char* str_gc_type = "NGC";
+#endif //BACKGROUND_GC
+
     dprintf (1, (ThreadStressLog::gcDetailedEndMsg(),
-        VolatileLoad(&settings.gc_index),
-        dd_collection_count(hp->dynamic_data_of(0)),
-        (size_t)(GetHighPrecisionTimeStamp() / 1000),
+        VolatileLoad (&settings.gc_index),
+        dd_collection_count (hp->dynamic_data_of (0)),
+        (size_t)(GetHighPrecisionTimeStamp () / 1000),
         settings.condemned_generation,
         (settings.concurrent ? "BGC" : (gc_heap::background_running_p() ? "FGC" : "NGC")),
         (settings.compaction ? "C" : "S"),
diff --git a/src/coreclr/gc/gcconfig.h b/src/coreclr/gc/gcconfig.h
index 72786778d5a97..aeded6bc97f17 100644
--- a/src/coreclr/gc/gcconfig.h
+++ b/src/coreclr/gc/gcconfig.h
@@ -83,6 +83,7 @@ class GCConfigStringHolder
     INT_CONFIG   (BGCSpinCount,              "BGCSpinCount",              NULL,                                140,                "Specifies the bgc spin count")                                                           \
     INT_CONFIG   (BGCSpin,                   "BGCSpin",                   NULL,                                2,                  "Specifies the bgc spin time")                                                            \
     INT_CONFIG   (HeapCount,                 "GCHeapCount",               "System.GC.HeapCount",               0,                  "Specifies the number of server GC heaps")                                                 \
+    INT_CONFIG   (MaxHeapCount,              "GCMaxHeapCount",            "System.GC.MaxHeapCount",            0,                  "Specifies the max number of server GC heaps to adjust to")                                                 \
     INT_CONFIG   (Gen0Size,                  "GCgen0size",                NULL,                                0,                  "Specifies the smallest gen0 budget")                                                     \
     INT_CONFIG   (SegmentSize,               "GCSegmentSize",             NULL,                                0,                  "Specifies the managed heap segment size")                                                \
     INT_CONFIG   (LatencyMode,               "GCLatencyMode",             NULL,                                -1,                 "Specifies the GC latency mode - batch, interactive or low latency (note that the same "   \
diff --git a/src/coreclr/gc/gcpriv.h b/src/coreclr/gc/gcpriv.h
index 1a73add83b429..cce6c5ee28adf 100644
--- a/src/coreclr/gc/gcpriv.h
+++ b/src/coreclr/gc/gcpriv.h
@@ -402,8 +402,6 @@ struct GCDebugSpinLock {
 #if defined(DYNAMIC_HEAP_COUNT)
     // time in microseconds we wait for the more space lock
     uint64_t msl_wait_time;
-    // number of times we wait for the more space lock
-    uint64_t msl_wait_count;
 #endif //DYNAMIC_HEAP_COUNT
 
     GCDebugSpinLock()
@@ -415,7 +413,7 @@ struct GCDebugSpinLock {
         , num_switch_thread(0), num_wait_longer(0), num_switch_thread_w(0), num_disable_preemptive_w(0)
 #endif
 #if defined(DYNAMIC_HEAP_COUNT)
-        , msl_wait_time(0), msl_wait_count(0)
+        , msl_wait_time(0)
 #endif //DYNAMIC_HEAP_COUNT
     {
     }
@@ -1148,15 +1146,12 @@ class dynamic_data
     //
     // The following 3 fields are updated at the beginning of each GC, if that GC condemns this generation.
     //
-    // The number of GC that condemned this generation. The only difference between this
-    // and collection_count is just that collection_count is maintained for all physical generations
-    // (currently there are 5) whereas this is only updated for logical generations (there are 3).
-    size_t    gc_clock;
-    uint64_t  time_clock;       //time when this gc started
+    size_t    gc_clock; // the gc index
+    uint64_t  time_clock; // time when this gc started
     uint64_t  previous_time_clock; // time when previous gc started
 
     // Updated at the end of a GC, if that GC condemns this generation.
-    size_t    gc_elapsed_time;  // Time it took for the gc to complete
+    size_t    gc_elapsed_time;  // time it took for the gc to complete
 
     //
     // The following fields (and fields in sdata) are initialized during GC init time and do not change.
@@ -1495,6 +1490,8 @@ class mark_queue_t
     void verify_empty();
 };
 
+float median_of_3 (float a, float b, float c);
+
 //class definition of the internal class
 class gc_heap
 {
@@ -2422,6 +2419,7 @@ class gc_heap
 #ifndef USE_REGIONS
     PER_HEAP_METHOD void rearrange_heap_segments(BOOL compacting);
 #endif //!USE_REGIONS
+    PER_HEAP_METHOD void delay_free_segments();
     PER_HEAP_ISOLATED_METHOD void distribute_free_regions();
 #ifdef BACKGROUND_GC
     PER_HEAP_ISOLATED_METHOD void reset_write_watch_for_gc_heap(void* base_address, size_t region_size);
@@ -2597,11 +2595,17 @@ class gc_heap
     // re-initialize a heap in preparation to putting it back into service
     PER_HEAP_METHOD void recommission_heap();
 
+    PER_HEAP_ISOLATED_METHOD size_t get_num_completed_gcs();
+
+    PER_HEAP_ISOLATED_METHOD int calculate_new_heap_count();
+
     // check if we should change the heap count
     PER_HEAP_METHOD void check_heap_count();
 
-    PER_HEAP_METHOD bool prepare_to_change_heap_count (int new_n_heaps);
+    PER_HEAP_ISOLATED_METHOD bool prepare_to_change_heap_count (int new_n_heaps);
     PER_HEAP_METHOD bool change_heap_count (int new_n_heaps);
+
+    PER_HEAP_ISOLATED_METHOD size_t get_msl_wait_time();
 #endif //DYNAMIC_HEAP_COUNT
 #endif //USE_REGIONS
 
@@ -3778,6 +3782,13 @@ class gc_heap
     PER_HEAP_FIELD_MAINTAINED mark*  loh_pinned_queue;
 #endif //FEATURE_LOH_COMPACTION
 
+#ifdef DYNAMIC_HEAP_COUNT
+    PER_HEAP_FIELD_MAINTAINED GCEvent gc_idle_thread_event;
+#ifdef BACKGROUND_GC
+    PER_HEAP_FIELD_MAINTAINED GCEvent bgc_idle_thread_event;
+#endif //BACKGROUND_GC
+#endif //DYNAMIC_HEAP_COUNT
+
     /******************************************/
     // PER_HEAP_FIELD_MAINTAINED_ALLOC fields //
     /******************************************/
@@ -4084,7 +4095,6 @@ class gc_heap
     // These 2 fields' values do not change but are set/unset per GC
     PER_HEAP_ISOLATED_FIELD_SINGLE_GC GCEvent gc_start_event;
     PER_HEAP_ISOLATED_FIELD_SINGLE_GC GCEvent ee_suspend_event;
-    PER_HEAP_ISOLATED_FIELD_SINGLE_GC GCEvent gc_idle_thread_event;
 
     // Also updated on the heap#0 GC thread because that's where we are actually doing the decommit.
     PER_HEAP_ISOLATED_FIELD_SINGLE_GC BOOL gradual_decommit_in_progress_p;
@@ -4163,6 +4173,10 @@ class gc_heap
     PER_HEAP_ISOLATED_FIELD_SINGLE_GC uint8_t* gc_high; // high end of the highest region being condemned
 #endif //USE_REGIONS
 
+#ifdef STRESS_DYNAMIC_HEAP_COUNT
+    PER_HEAP_ISOLATED_FIELD_SINGLE_GC int heaps_in_this_gc;
+#endif //STRESS_DYNAMIC_HEAP_COUNT
+
     /**************************************************/
     // PER_HEAP_ISOLATED_FIELD_SINGLE_GC_ALLOC fields //
     /**************************************************/
@@ -4261,37 +4275,65 @@ class gc_heap
 #endif //USE_REGIONS
 
 #ifdef DYNAMIC_HEAP_COUNT
+    // Sample collection -
+    // 
+    // For every GC, we collect the msl wait time + GC pause duration info and use both to calculate the
+    // throughput cost percentage. We will also be using the wait time and the GC pause duration separately
+    // for other purposes in the future.
+    //
+    // For all gen2 GCs we also keep a separate array currently just for the GC cost. This serves as a backstop
+    // to smooth out the situation when we rarely pick the gen2 GCs in the first array.
     struct dynamic_heap_count_data_t
     {
         static const int sample_size = 3;
 
         struct sample
         {
-            uint64_t    elapsed_between_gcs;    // time between gcs in microseconds
-            uint64_t    gc_elapsed_time;        // time the gc took
-            uint64_t    soh_msl_wait_time;      // time the allocator spent waiting for the soh msl lock
-            uint64_t    uoh_msl_wait_time;      // time the allocator spent waiting for the uoh msl lock
-            size_t      allocating_thread_count;// number of allocating threads
-            size_t      heap_size;
+            uint64_t    elapsed_between_gcs;    // time between gcs in microseconds (this should really be between_pauses)
+            uint64_t    gc_pause_time;          // pause time for this GC
+            uint64_t    msl_wait_time;
         };
 
-        unsigned        sample_index;
+        uint32_t        sample_index;
         sample          samples[sample_size];
+        size_t          prev_num_completed_gcs;
+
+        uint32_t        gen2_sample_index;
+        // This is (gc_elapsed_time / time inbetween this and the last gen2 GC)
+        float           gen2_gc_percents[sample_size];
 
-        float median_percent_overhead;          // estimated overhead of allocator + gc
-        float smoothed_median_percent_overhead; // exponentially smoothed version
-        float percent_heap_space_cost_per_heap; // percent space cost of adding a heap
-        float overhead_reduction_per_step_up;   // percentage effect on overhead of increasing heap count
-        float overhead_increase_per_step_down;  // percentage effect on overhead of decreasing heap count
-        float space_cost_increase_per_step_up;  // percentage effect on space of increasing heap count
-        float space_cost_decrease_per_step_down;// percentage effect on space of decreasing heap count
+        float median_throughput_cost_percent;          // estimated overhead of allocator + gc
+        float smoothed_median_throughput_cost_percent; // exponentially smoothed version
+        float percent_heap_space_cost_per_heap;        // percent space cost of adding a heap
+        float tcp_reduction_per_step_up;               // throughput cost percent effect of increasing heap count
+        float tcp_increase_per_step_down;              // throughput cost percent effect of decreasing heap count
+        float scp_increase_per_step_up;                // space cost percent effect of increasing heap count
+        float scp_decrease_per_step_down;              // space cost percent effect of decreasing heap count
 
         int             new_n_heaps;
+        // the heap count we changed from
+        int             last_n_heaps;
+        // don't start a GC till we see (n_max_heaps - new_n_heaps) number of threads idling
+        VOLATILE(int32_t) idle_thread_count;
+        bool            init_only_p;
+
+        bool            should_change_heap_count;
+        int             heap_count_to_change_to;
+        int             heap_count_change_count;
 #ifdef STRESS_DYNAMIC_HEAP_COUNT
         int             lowest_heap_with_msl_uoh;
 #endif //STRESS_DYNAMIC_HEAP_COUNT
+
+        float get_median_gen2_gc_percent()
+        {
+            return median_of_3 (gen2_gc_percents[0], gen2_gc_percents[1], gen2_gc_percents[2]);
+        }
     };
     PER_HEAP_ISOLATED_FIELD_MAINTAINED dynamic_heap_count_data_t dynamic_heap_count_data;
+    PER_HEAP_ISOLATED_FIELD_MAINTAINED uint64_t last_suspended_end_time;
+    // If the last full GC is blocking, this is that GC's index; for BGC, this is the settings.gc_index
+    // when the BGC ended.
+    PER_HEAP_ISOLATED_FIELD_MAINTAINED size_t gc_index_full_gc_end;
 #endif //DYNAMIC_HEAP_COUNT
 
     /****************************************************/
@@ -4867,7 +4909,6 @@ uint64_t& dd_previous_time_clock (dynamic_data* inst)
     return inst->previous_time_clock;
 }
 
-
 inline
 size_t& dd_gc_clock_interval (dynamic_data* inst)
 {

From 18bfe59bc967529e633498ad20c9710a5e4752d0 Mon Sep 17 00:00:00 2001
From: "dotnet-maestro[bot]"
 <42748379+dotnet-maestro[bot]@users.noreply.github.com>
Date: Wed, 20 Sep 2023 09:26:56 -0700
Subject: [PATCH 6/9] Update dependencies from https://github.com/dotnet/emsdk
 build 20230919.4 (#92322)

Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100.Transport
 From Version 8.0.0-rc.2.23469.1 -> To Version 8.0.0-rc.2.23469.4

Co-authored-by: dotnet-maestro[bot] <dotnet-maestro[bot]@users.noreply.github.com>
---
 eng/Version.Details.xml | 4 ++--
 eng/Versions.props      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml
index 489044e0d1788..bb8f1d633fbb0 100644
--- a/eng/Version.Details.xml
+++ b/eng/Version.Details.xml
@@ -90,9 +90,9 @@
       <Sha>89be445dd4936157533ad96bafb95f701430653a</Sha>
       <SourceBuild RepoName="cecil" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100.Transport" Version="8.0.0-rc.2.23469.1">
+    <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100.Transport" Version="8.0.0-rc.2.23469.4">
       <Uri>https://github.com/dotnet/emsdk</Uri>
-      <Sha>ca3e8e40f417e421141638c66dd301395afee66d</Sha>
+      <Sha>ea0e8e8214e9acc0cba7e78a836ed6656f788d11</Sha>
       <SourceBuild RepoName="emsdk" ManagedOnly="true" />
     </Dependency>
     <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-reference-packages" Version="8.0.0-alpha.1.23469.1">
diff --git a/eng/Versions.props b/eng/Versions.props
index d56eeab17ef17..61344b9a91613 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -240,7 +240,7 @@
          Note: when the name is updated, make sure to update dependency name in eng/pipelines/common/xplat-setup.yml
                like - DarcDependenciesChanged.Microsoft_NET_Workload_Emscripten_Current_Manifest-8_0_100_Transport
     -->
-    <MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>8.0.0-rc.2.23469.1</MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>
+    <MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>8.0.0-rc.2.23469.4</MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>
     <MicrosoftNETRuntimeEmscriptenVersion>$(MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion)</MicrosoftNETRuntimeEmscriptenVersion>
     <!-- workloads -->
     <SwixPackageVersion>1.1.87-gba258badda</SwixPackageVersion>

From 58eb21d3fede84306fcf3fa2ac4ff0f4cae86a5b Mon Sep 17 00:00:00 2001
From: Ankit Jain <radical@gmail.com>
Date: Wed, 20 Sep 2023 18:53:04 -0400
Subject: [PATCH 7/9] [release/8.0-rc2][wasm] Add a dependency on
 dotnet/installer to get the latest (#92353)

* [wasm] Add a dependency on dotnet/installer to get the latest

And update version from `.NET 8.0.1xx SDK` channel.

(cherry picked from commit c821c361321d710aae7a24a6293a876ddcbf374e)

* [wasm] WBT: Use --skip-sign-check when installing workloads

* [wasm] WBT: Update blazor tests to track changed path for Pages

.. from `Pages/` to `Components/Pages/`.
---
 eng/Version.Details.xml                                  | 4 ++++
 eng/Versions.props                                       | 5 +++--
 src/mono/wasm/README.md                                  | 9 +++++++++
 src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs      | 4 ++--
 .../Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs     | 2 +-
 .../WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs   | 2 +-
 6 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml
index bb8f1d633fbb0..9bedd07535a17 100644
--- a/eng/Version.Details.xml
+++ b/eng/Version.Details.xml
@@ -398,5 +398,9 @@
       <Uri>https://github.com/NuGet/NuGet.Client</Uri>
       <Sha>8fef55f5a55a3b4f2c96cd1a9b5ddc51d4b927f8</Sha>
     </Dependency>
+    <Dependency Name="Microsoft.Dotnet.Sdk.Internal" Version="8.0.100-rc.2.23470.7">
+      <Uri>https://github.com/dotnet/installer</Uri>
+      <Sha>dbeae1ac71d95355452952059f35960991cb3fd2</Sha>
+    </Dependency>
   </ToolsetDependencies>
 </Dependencies>
diff --git a/eng/Versions.props b/eng/Versions.props
index 61344b9a91613..abf8964b954d1 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -205,8 +205,6 @@
     <GrpcCoreVersion>2.46.3</GrpcCoreVersion>
     <GrpcDotnetClientVersion>2.45.0</GrpcDotnetClientVersion>
     <GrpcToolsVersion>2.45.0</GrpcToolsVersion>
-    <!-- Uncomment to set a fixed version, else the latest is used -->
-    <SdkVersionForWorkloadTesting>8.0.100-rc.1.23415.5</SdkVersionForWorkloadTesting>
     <CompilerPlatformTestingVersion>1.1.2-beta1.23323.1</CompilerPlatformTestingVersion>
     <!-- Docs -->
     <MicrosoftPrivateIntellisenseVersion>8.0.0-preview-20230918.1</MicrosoftPrivateIntellisenseVersion>
@@ -257,5 +255,8 @@
     <!-- BrowserDebugProxy libs -->
     <MicrosoftExtensionsLoggingVersion>3.1.7</MicrosoftExtensionsLoggingVersion>
     <MicrosoftSymbolStoreVersion>1.0.406601</MicrosoftSymbolStoreVersion>
+    <!-- installer version, for testing workloads -->
+    <MicrosoftDotnetSdkInternalVersion>8.0.100-rc.2.23470.7</MicrosoftDotnetSdkInternalVersion>
+    <SdkVersionForWorkloadTesting>$(MicrosoftDotnetSdkInternalVersion)</SdkVersionForWorkloadTesting>
   </PropertyGroup>
 </Project>
diff --git a/src/mono/wasm/README.md b/src/mono/wasm/README.md
index 34c18bc871146..cdace086f5603 100644
--- a/src/mono/wasm/README.md
+++ b/src/mono/wasm/README.md
@@ -350,3 +350,12 @@ npm update --lockfile-version=1
 | Multi-thread      | linux: build only          | none                                 |
 
 * `high resource aot` runs a few specific library tests with AOT, that require more memory to AOT.
+
+
+# Perf pipeline
+
+TBD
+
+## Updates needed
+
+- when the base OS is upgraded, check if the version of node installed in the `eng/pipelines/coreclr/templates/run-performance-job.yml` needs an upgrade too.
diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs
index 10717b334174b..b006bf7a93e5c 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/MiscTests3.cs
@@ -63,7 +63,7 @@ public static class MyDllImports
                 public static extern int cpp_add(int a, int b);
             }}";
 
-        File.WriteAllText(Path.Combine(_projectDir!, "Pages", "MyDllImport.cs"), myDllImportCs);
+        File.WriteAllText(Path.Combine(_projectDir!, "Components", "Pages", "MyDllImport.cs"), myDllImportCs);
 
         AddItemsPropertiesToProject(projectFile, extraItems: @"<NativeFileReference Include=""mylib.cpp"" />");
         BlazorAddRazorButton("cpp_add", """
@@ -144,7 +144,7 @@ public void BugRegression_60479_WithRazorClassLib()
         Assert.Contains(razorClassLibraryFileName, lazyVal.EnumerateObject().Select(jp => jp.Name));
     }
 
-    private void BlazorAddRazorButton(string buttonText, string customCode, string methodName = "test", string razorPage = "Pages/Counter.razor")
+    private void BlazorAddRazorButton(string buttonText, string customCode, string methodName = "test", string razorPage = "Components/Pages/Counter.razor")
     {
         string additionalCode = $$"""
             <p role="{{methodName}}">Output: @outputText</p>
diff --git a/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs b/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs
index cbe3e461ec9cd..7ca663dce5372 100644
--- a/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs
+++ b/src/mono/wasm/Wasm.Build.Tests/Blazor/WorkloadRequiredTests.cs
@@ -84,7 +84,7 @@ public async Task WorkloadNotRequiredForInvariantGlobalization(string config, bo
         if (invariant)
             AddItemsPropertiesToProject(projectFile, extraProperties: "<InvariantGlobalization>true</InvariantGlobalization>");
 
-        string counterPath = Path.Combine(Path.GetDirectoryName(projectFile)!, "Pages", "Counter.razor");
+        string counterPath = Path.Combine(Path.GetDirectoryName(projectFile)!, "Components", "Pages", "Counter.razor");
         string allText = File.ReadAllText(counterPath);
         string ccText = "currentCount++;";
         if (allText.IndexOf(ccText) < 0)
diff --git a/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs b/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
index e520057d5b3bd..224527142fa89 100644
--- a/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
+++ b/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
@@ -215,7 +215,7 @@ private bool InstallPacks(InstallWorkloadRequest req, string nugetConfigContents
             (int exitCode, string output) = Utils.TryRunProcess(
                                                     Log,
                                                     Path.Combine(req.TargetPath, "dotnet"),
-                                                    $"workload install --skip-manifest-update --configfile \"{nugetConfigPath}\" --temp-dir \"{_tempDir}/workload-install-temp\" {req.WorkloadId}",
+                                                    $"workload install --skip-manifest-update --skip-sign-check --configfile \"{nugetConfigPath}\" --temp-dir \"{_tempDir}/workload-install-temp\" {req.WorkloadId}",
                                                     workingDir: _tempDir,
                                                     envVars: new Dictionary<string, string> () {
                                                         ["NUGET_PACKAGES"] = _nugetCachePath

From 1361b189a45e312958e3f8509f053150300b494f Mon Sep 17 00:00:00 2001
From: Ankit Jain <radical@gmail.com>
Date: Wed, 20 Sep 2023 19:15:45 -0400
Subject: [PATCH 8/9] Update dependencies from dotnet/emsdk

Picking the update from https://github.com/dotnet/runtime/pull/92320 to
here, to avoid the overlap.
---
 eng/Version.Details.xml | 4 ++--
 eng/Versions.props      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/eng/Version.Details.xml b/eng/Version.Details.xml
index 9bedd07535a17..f33ad2fde3408 100644
--- a/eng/Version.Details.xml
+++ b/eng/Version.Details.xml
@@ -90,9 +90,9 @@
       <Sha>89be445dd4936157533ad96bafb95f701430653a</Sha>
       <SourceBuild RepoName="cecil" ManagedOnly="true" />
     </Dependency>
-    <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100.Transport" Version="8.0.0-rc.2.23469.4">
+    <Dependency Name="Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100.Transport" Version="8.0.0-rtm.23470.1">
       <Uri>https://github.com/dotnet/emsdk</Uri>
-      <Sha>ea0e8e8214e9acc0cba7e78a836ed6656f788d11</Sha>
+      <Sha>446eeb331fcbf2f48c14a377601a8ab950ec942e</Sha>
       <SourceBuild RepoName="emsdk" ManagedOnly="true" />
     </Dependency>
     <Dependency Name="Microsoft.SourceBuild.Intermediate.source-build-reference-packages" Version="8.0.0-alpha.1.23469.1">
diff --git a/eng/Versions.props b/eng/Versions.props
index abf8964b954d1..a8dc199cc94ed 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -238,7 +238,7 @@
          Note: when the name is updated, make sure to update dependency name in eng/pipelines/common/xplat-setup.yml
                like - DarcDependenciesChanged.Microsoft_NET_Workload_Emscripten_Current_Manifest-8_0_100_Transport
     -->
-    <MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>8.0.0-rc.2.23469.4</MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>
+    <MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>8.0.0-rtm.23470.1</MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion>
     <MicrosoftNETRuntimeEmscriptenVersion>$(MicrosoftNETWorkloadEmscriptenCurrentManifest80100TransportVersion)</MicrosoftNETRuntimeEmscriptenVersion>
     <!-- workloads -->
     <SwixPackageVersion>1.1.87-gba258badda</SwixPackageVersion>

From 02d408eeb7e74d6cbc58807eb5c418180e6b6909 Mon Sep 17 00:00:00 2001
From: Ankit Jain <radical@gmail.com>
Date: Wed, 20 Sep 2023 16:41:07 -0400
Subject: [PATCH 9/9] [wasm] InstallWorkloadFromArtifacts: correctly handle rtm
 versions

In case of a version like `8.0.100-rtm.23470.1`, we incorrectly
extracted `-rtm.23470` instead of `-rtm`, which resulted in trying to
install package named `Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100-rtm.23470` instead of `Microsoft.NET.Workload.Emscripten.Current.Manifest-8.0.100-rtm`.
---
 src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs b/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
index 224527142fa89..62817719af0a1 100644
--- a/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
+++ b/src/tasks/WorkloadBuildTasks/InstallWorkloadFromArtifacts.cs
@@ -49,7 +49,7 @@ public partial class InstallWorkloadFromArtifacts : Task
         private string _tempDir = string.Empty;
         private string _nugetCachePath = string.Empty;
 
-        [GeneratedRegex(@"^\d+\.\d+\.\d+(-[A-z]*\.*\d*)?")]
+        [GeneratedRegex(@"^\d+\.\d+\.\d+(-rtm|-[A-z]*\.*\d*)?")]
         private static partial Regex bandVersionRegex();
 
         public override bool Execute()