From a8c71f0a15983a17ed07490feb4a195ababff439 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 20 Jul 2023 09:09:09 +1000
Subject: [PATCH 01/20] Inline and remove
 `submit_pre_codegened_module_to_llvm`.

It has a single callsite, and provides little value.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 13 -------------
 compiler/rustc_codegen_ssa/src/base.rs       | 11 +++++++++--
 2 files changed, 9 insertions(+), 15 deletions(-)
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 682418f3a35de..df68cf1e28bde 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -1962,19 +1962,6 @@ impl<B: ExtraBackendMethods> OngoingCodegen<B> {
         )
     }
 
-    pub fn submit_pre_codegened_module_to_llvm(
-        &self,
-        tcx: TyCtxt<'_>,
-        module: ModuleCodegen<B::Module>,
-    ) {
-        self.wait_for_signal_to_codegen_item();
-        self.check_for_errors(tcx.sess);
-
-        // These are generally cheap and won't throw off scheduling.
-        let cost = 0;
-        submit_codegened_module_to_llvm(&self.backend, &self.coordinator.sender, module, cost);
-    }
-
     pub fn codegen_finished(&self, tcx: TyCtxt<'_>) {
         self.wait_for_signal_to_codegen_item();
         self.check_for_errors(tcx.sess);
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 79c284ecfbfa0..8135b84216247 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -664,9 +664,16 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
             )
         });
 
-        ongoing_codegen.submit_pre_codegened_module_to_llvm(
-            tcx,
+        ongoing_codegen.wait_for_signal_to_codegen_item();
+        ongoing_codegen.check_for_errors(tcx.sess);
+
+        // These modules are generally cheap and won't throw off scheduling.
+        let cost = 0;
+        submit_codegened_module_to_llvm(
+            &backend,
+            &ongoing_codegen.coordinator.sender,
             ModuleCodegen { name: llmod_id, module_llvm, kind: ModuleKind::Allocator },
+            cost,
         );
     }
 

From 4f598b852cf536a96234a1043a0af9f72d7e37e6 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 20 Jul 2023 11:19:52 +1000
Subject: [PATCH 02/20] Add comments to `WorkItemResult`.

And rename the `Compiled` variant as `Finished`, because that name makes
it clearer there is nothing left to do, contrasting nicely with the
`Needs*` variants.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index df68cf1e28bde..2ef0a7c1eac3a 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -751,9 +751,19 @@ impl<B: WriteBackendMethods> WorkItem<B> {
 
 /// A result produced by the backend.
 pub(crate) enum WorkItemResult<B: WriteBackendMethods> {
-    Compiled(CompiledModule),
+    /// The backend has finished compiling a CGU, nothing more required.
+    Finished(CompiledModule),
+
+    /// The backend has finished compiling a CGU, which now needs linking
+    /// because `-Zcombine-cgu` was specified.
     NeedsLink(ModuleCodegen<B::Module>),
+
+    /// The backend has finished compiling a CGU, which now needs to go through
+    /// fat LTO.
     NeedsFatLTO(FatLTOInput<B>),
+
+    /// The backend has finished compiling a CGU, which now needs to go through
+    /// thin LTO.
     NeedsThinLTO(String, B::ThinBuffer),
 }
 
@@ -906,7 +916,7 @@ fn execute_copy_from_cache_work_item<B: ExtraBackendMethods>(
             load_from_incr_comp_dir(dwarf_obj_out, &saved_dwarf_object_file)
         });
 
-    WorkItemResult::Compiled(CompiledModule {
+    WorkItemResult::Finished(CompiledModule {
         name: module.name,
         kind: ModuleKind::Regular,
         object,
@@ -936,7 +946,7 @@ fn finish_intra_module_work<B: ExtraBackendMethods>(
         || module.kind == ModuleKind::Allocator
     {
         let module = unsafe { B::codegen(cgcx, &diag_handler, module, module_config)? };
-        Ok(WorkItemResult::Compiled(module))
+        Ok(WorkItemResult::Finished(module))
     } else {
         Ok(WorkItemResult::NeedsLink(module))
     }
@@ -1522,7 +1532,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     free_worker(worker_id);
 
                     match result {
-                        Ok(WorkItemResult::Compiled(compiled_module)) => {
+                        Ok(WorkItemResult::Finished(compiled_module)) => {
                             match compiled_module.kind {
                                 ModuleKind::Regular => {
                                     compiled_modules.push(compiled_module);

From fd017d3c170796790755441dbb95020b13fd5549 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 20 Jul 2023 11:48:11 +1000
Subject: [PATCH 03/20] Add some assertions.

- Thin and fat LTO can't happen together.
- `NeedsLink` and (non-allocator) `Compiled` work item results can't
  happen together.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 2ef0a7c1eac3a..540685bd117a4 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -1535,6 +1535,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         Ok(WorkItemResult::Finished(compiled_module)) => {
                             match compiled_module.kind {
                                 ModuleKind::Regular => {
+                                    assert!(needs_link.is_empty());
                                     compiled_modules.push(compiled_module);
                                 }
                                 ModuleKind::Allocator => {
@@ -1545,14 +1546,17 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             }
                         }
                         Ok(WorkItemResult::NeedsLink(module)) => {
+                            assert!(compiled_modules.is_empty());
                             needs_link.push(module);
                         }
                         Ok(WorkItemResult::NeedsFatLTO(fat_lto_input)) => {
                             assert!(!started_lto);
+                            assert!(needs_thin_lto.is_empty());
                             needs_fat_lto.push(fat_lto_input);
                         }
                         Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer)) => {
                             assert!(!started_lto);
+                            assert!(needs_fat_lto.is_empty());
                             needs_thin_lto.push((name, thin_buffer));
                         }
                         Err(Some(WorkerFatalError)) => {

From 5bef04ed38ee916451d31cce4e4000a5b3c20f22 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 20 Jul 2023 12:09:46 +1000
Subject: [PATCH 04/20] Rename things related to the main thread's operations.

It took me some time to understand how the main thread can lend a
jobserver token to an LLVM thread. This commit renames a couple of
things to make it clearer.

- Rename the `LLVMing` variant as `Lending`, because that is a clearer
  description of what is happening.
- Rename `running` as `running_with_own_token`, which makes it clearer
  that there might be one additional LLVM thread running (with a loaned
  token). Also add a comment to its definition.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 76 ++++++++++++--------
 1 file changed, 46 insertions(+), 30 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 540685bd117a4..27acd7ad3c299 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -998,9 +998,14 @@ struct Diagnostic {
 
 #[derive(PartialEq, Clone, Copy, Debug)]
 enum MainThreadWorkerState {
+    /// Doing nothing.
     Idle,
+
+    /// Doing codegen, i.e. MIR-to-LLVM-IR conversion.
     Codegenning,
-    LLVMing,
+
+    /// Idle, but lending the compiler process's Token to an LLVM thread so it can do useful work.
+    Lending,
 }
 
 fn start_executing_work<B: ExtraBackendMethods>(
@@ -1296,7 +1301,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
         let mut tokens = Vec::new();
 
         let mut main_thread_worker_state = MainThreadWorkerState::Idle;
-        let mut running = 0;
+
+        // How many LLVM worker threads are running while holding a Token. This
+        // *excludes* the LLVM worker thread that the main thread is lending a
+        // Token to (when the main thread is in the `Lending` state).
+        // In other words, the number of LLVM threads is actually equal to
+        // `running + if main_thread_worker_state == Lending { 1 } else { 0 }`.
+        let mut running_with_own_token = 0;
 
         let prof = &cgcx.prof;
         let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;
@@ -1307,8 +1318,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
         // only apply if codegen hasn't been aborted as they represent pending
         // work to be done.
         while codegen_state == Ongoing
-            || running > 0
-            || main_thread_worker_state == MainThreadWorkerState::LLVMing
+            || running_with_own_token > 0
+            || main_thread_worker_state == MainThreadWorkerState::Lending
             || (codegen_state == Completed
                 && !(work_items.is_empty()
                     && needs_fat_lto.is_empty()
@@ -1323,13 +1334,14 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 if main_thread_worker_state == MainThreadWorkerState::Idle {
                     // Compute the number of workers that will be running once we've taken as many
                     // items from the work queue as we can, plus one for the main thread. It's not
-                    // critically important that we use this instead of just `running`, but it
-                    // prevents the `queue_full_enough` heuristic from fluctuating just because a
-                    // worker finished up and we decreased the `running` count, even though we're
-                    // just going to increase it right after this when we put a new worker to work.
-                    let extra_tokens = tokens.len().checked_sub(running).unwrap();
+                    // critically important that we use this instead of just
+                    // `running_with_own_token`, but it prevents the `queue_full_enough` heuristic
+                    // from fluctuating just because a worker finished up and we decreased the
+                    // `running_with_own_token` count, even though we're just going to increase it
+                    // right after this when we put a new worker to work.
+                    let extra_tokens = tokens.len().checked_sub(running_with_own_token).unwrap();
                     let additional_running = std::cmp::min(extra_tokens, work_items.len());
-                    let anticipated_running = running + additional_running + 1;
+                    let anticipated_running = running_with_own_token + additional_running + 1;
 
                     if !queue_full_enough(work_items.len(), anticipated_running) {
                         // The queue is not full enough, process more codegen units:
@@ -1352,7 +1364,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             cgcx.config(item.module_kind()),
                             &mut llvm_start_time,
                         );
-                        main_thread_worker_state = MainThreadWorkerState::LLVMing;
+                        main_thread_worker_state = MainThreadWorkerState::Lending;
                         spawn_work(cgcx, item);
                     }
                 }
@@ -1363,7 +1375,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 // going to LTO and then push a bunch of work items onto our
                 // queue to do LTO
                 if work_items.is_empty()
-                    && running == 0
+                    && running_with_own_token == 0
                     && main_thread_worker_state == MainThreadWorkerState::Idle
                 {
                     assert!(!started_lto);
@@ -1401,7 +1413,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                                 cgcx.config(item.module_kind()),
                                 &mut llvm_start_time,
                             );
-                            main_thread_worker_state = MainThreadWorkerState::LLVMing;
+                            main_thread_worker_state = MainThreadWorkerState::Lending;
                             spawn_work(cgcx, item);
                         } else {
                             // There is no unstarted work, so let the main thread
@@ -1410,16 +1422,16 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             // We reduce the `running` counter by one. The
                             // `tokens.truncate()` below will take care of
                             // giving the Token back.
-                            debug_assert!(running > 0);
-                            running -= 1;
-                            main_thread_worker_state = MainThreadWorkerState::LLVMing;
+                            debug_assert!(running_with_own_token > 0);
+                            running_with_own_token -= 1;
+                            main_thread_worker_state = MainThreadWorkerState::Lending;
                         }
                     }
                     MainThreadWorkerState::Codegenning => bug!(
                         "codegen worker should not be codegenning after \
                               codegen was already completed"
                     ),
-                    MainThreadWorkerState::LLVMing => {
+                    MainThreadWorkerState::Lending => {
                         // Already making good use of that token
                     }
                 }
@@ -1431,7 +1443,10 @@ fn start_executing_work<B: ExtraBackendMethods>(
 
             // Spin up what work we can, only doing this while we've got available
             // parallelism slots and work left to spawn.
-            while codegen_state != Aborted && !work_items.is_empty() && running < tokens.len() {
+            while codegen_state != Aborted
+                && !work_items.is_empty()
+                && running_with_own_token < tokens.len()
+            {
                 let (item, _) = work_items.pop().unwrap();
 
                 maybe_start_llvm_timer(prof, cgcx.config(item.module_kind()), &mut llvm_start_time);
@@ -1440,22 +1455,22 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     CodegenContext { worker: get_worker_id(&mut free_worker_ids), ..cgcx.clone() };
 
                 spawn_work(cgcx, item);
-                running += 1;
+                running_with_own_token += 1;
             }
 
-            // Relinquish accidentally acquired extra tokens
-            tokens.truncate(running);
+            // Relinquish accidentally acquired extra tokens.
+            tokens.truncate(running_with_own_token);
 
             // If a thread exits successfully then we drop a token associated
-            // with that worker and update our `running` count. We may later
-            // re-acquire a token to continue running more work. We may also not
-            // actually drop a token here if the worker was running with an
-            // "ephemeral token"
+            // with that worker and update our `running_with_own_token` count.
+            // We may later re-acquire a token to continue running more work.
+            // We may also not actually drop a token here if the worker was
+            // running with an "ephemeral token".
             let mut free_worker = |worker_id| {
-                if main_thread_worker_state == MainThreadWorkerState::LLVMing {
+                if main_thread_worker_state == MainThreadWorkerState::Lending {
                     main_thread_worker_state = MainThreadWorkerState::Idle;
                 } else {
-                    running -= 1;
+                    running_with_own_token -= 1;
                 }
 
                 free_worker_ids.push(worker_id);
@@ -1471,13 +1486,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         Ok(token) => {
                             tokens.push(token);
 
-                            if main_thread_worker_state == MainThreadWorkerState::LLVMing {
+                            if main_thread_worker_state == MainThreadWorkerState::Lending {
                                 // If the main thread token is used for LLVM work
                                 // at the moment, we turn that thread into a regular
                                 // LLVM worker thread, so the main thread is free
                                 // to react to codegen demand.
                                 main_thread_worker_state = MainThreadWorkerState::Idle;
-                                running += 1;
+                                running_with_own_token += 1;
                             }
                         }
                         Err(e) => {
@@ -1523,7 +1538,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 // to exit as soon as possible, but we want to make sure all
                 // existing work has finished. Flag codegen as being done, and
                 // then conditions above will ensure no more work is spawned but
-                // we'll keep executing this loop until `running` hits 0.
+                // we'll keep executing this loop until `running_with_own_token`
+                // hits 0.
                 Message::CodegenAborted => {
                     codegen_state = Aborted;
                 }

From f81fe9d702c936bc32fb3df8e58c02409a7e36dc Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 20 Jul 2023 14:05:15 +1000
Subject: [PATCH 05/20] Rename `MainThreadWorkerState`.

The `Worker` is unnecessary, and just makes it longer than necessary.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 50 ++++++++++----------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 27acd7ad3c299..5ce39e88ca3d7 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -997,7 +997,7 @@ struct Diagnostic {
 }
 
 #[derive(PartialEq, Clone, Copy, Debug)]
-enum MainThreadWorkerState {
+enum MainThreadState {
     /// Doing nothing.
     Idle,
 
@@ -1300,13 +1300,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
         // the implicit Token the compiler process owns no matter what.
         let mut tokens = Vec::new();
 
-        let mut main_thread_worker_state = MainThreadWorkerState::Idle;
+        let mut main_thread_state = MainThreadState::Idle;
 
         // How many LLVM worker threads are running while holding a Token. This
         // *excludes* the LLVM worker thread that the main thread is lending a
         // Token to (when the main thread is in the `Lending` state).
         // In other words, the number of LLVM threads is actually equal to
-        // `running + if main_thread_worker_state == Lending { 1 } else { 0 }`.
+        // `running + if main_thread_state == Lending { 1 } else { 0 }`.
         let mut running_with_own_token = 0;
 
         let prof = &cgcx.prof;
@@ -1319,19 +1319,19 @@ fn start_executing_work<B: ExtraBackendMethods>(
         // work to be done.
         while codegen_state == Ongoing
             || running_with_own_token > 0
-            || main_thread_worker_state == MainThreadWorkerState::Lending
+            || main_thread_state == MainThreadState::Lending
             || (codegen_state == Completed
                 && !(work_items.is_empty()
                     && needs_fat_lto.is_empty()
                     && needs_thin_lto.is_empty()
                     && lto_import_only_modules.is_empty()
-                    && main_thread_worker_state == MainThreadWorkerState::Idle))
+                    && main_thread_state == MainThreadState::Idle))
         {
             // While there are still CGUs to be codegened, the coordinator has
             // to decide how to utilize the compiler processes implicit Token:
             // For codegenning more CGU or for running them through LLVM.
             if codegen_state == Ongoing {
-                if main_thread_worker_state == MainThreadWorkerState::Idle {
+                if main_thread_state == MainThreadState::Idle {
                     // Compute the number of workers that will be running once we've taken as many
                     // items from the work queue as we can, plus one for the main thread. It's not
                     // critically important that we use this instead of just
@@ -1348,7 +1348,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         if codegen_worker_send.send(CguMessage).is_err() {
                             panic!("Could not send CguMessage to main thread")
                         }
-                        main_thread_worker_state = MainThreadWorkerState::Codegenning;
+                        main_thread_state = MainThreadState::Codegenning;
                     } else {
                         // The queue is full enough to not let the worker
                         // threads starve. Use the implicit Token to do some
@@ -1364,7 +1364,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             cgcx.config(item.module_kind()),
                             &mut llvm_start_time,
                         );
-                        main_thread_worker_state = MainThreadWorkerState::Lending;
+                        main_thread_state = MainThreadState::Lending;
                         spawn_work(cgcx, item);
                     }
                 }
@@ -1376,7 +1376,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 // queue to do LTO
                 if work_items.is_empty()
                     && running_with_own_token == 0
-                    && main_thread_worker_state == MainThreadWorkerState::Idle
+                    && main_thread_state == MainThreadState::Idle
                 {
                     assert!(!started_lto);
                     started_lto = true;
@@ -1401,8 +1401,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 // In this branch, we know that everything has been codegened,
                 // so it's just a matter of determining whether the implicit
                 // Token is free to use for LLVM work.
-                match main_thread_worker_state {
-                    MainThreadWorkerState::Idle => {
+                match main_thread_state {
+                    MainThreadState::Idle => {
                         if let Some((item, _)) = work_items.pop() {
                             let cgcx = CodegenContext {
                                 worker: get_worker_id(&mut free_worker_ids),
@@ -1413,7 +1413,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                                 cgcx.config(item.module_kind()),
                                 &mut llvm_start_time,
                             );
-                            main_thread_worker_state = MainThreadWorkerState::Lending;
+                            main_thread_state = MainThreadState::Lending;
                             spawn_work(cgcx, item);
                         } else {
                             // There is no unstarted work, so let the main thread
@@ -1424,14 +1424,14 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             // giving the Token back.
                             debug_assert!(running_with_own_token > 0);
                             running_with_own_token -= 1;
-                            main_thread_worker_state = MainThreadWorkerState::Lending;
+                            main_thread_state = MainThreadState::Lending;
                         }
                     }
-                    MainThreadWorkerState::Codegenning => bug!(
+                    MainThreadState::Codegenning => bug!(
                         "codegen worker should not be codegenning after \
                               codegen was already completed"
                     ),
-                    MainThreadWorkerState::Lending => {
+                    MainThreadState::Lending => {
                         // Already making good use of that token
                     }
                 }
@@ -1467,8 +1467,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
             // We may also not actually drop a token here if the worker was
             // running with an "ephemeral token".
             let mut free_worker = |worker_id| {
-                if main_thread_worker_state == MainThreadWorkerState::Lending {
-                    main_thread_worker_state = MainThreadWorkerState::Idle;
+                if main_thread_state == MainThreadState::Lending {
+                    main_thread_state = MainThreadState::Idle;
                 } else {
                     running_with_own_token -= 1;
                 }
@@ -1486,12 +1486,12 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         Ok(token) => {
                             tokens.push(token);
 
-                            if main_thread_worker_state == MainThreadWorkerState::Lending {
+                            if main_thread_state == MainThreadState::Lending {
                                 // If the main thread token is used for LLVM work
                                 // at the moment, we turn that thread into a regular
                                 // LLVM worker thread, so the main thread is free
                                 // to react to codegen demand.
-                                main_thread_worker_state = MainThreadWorkerState::Idle;
+                                main_thread_state = MainThreadState::Idle;
                                 running_with_own_token += 1;
                             }
                         }
@@ -1521,16 +1521,16 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     if !cgcx.opts.unstable_opts.no_parallel_llvm {
                         helper.request_token();
                     }
-                    assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning);
-                    main_thread_worker_state = MainThreadWorkerState::Idle;
+                    assert_eq!(main_thread_state, MainThreadState::Codegenning);
+                    main_thread_state = MainThreadState::Idle;
                 }
 
                 Message::CodegenComplete => {
                     if codegen_state != Aborted {
                         codegen_state = Completed;
                     }
-                    assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning);
-                    main_thread_worker_state = MainThreadWorkerState::Idle;
+                    assert_eq!(main_thread_state, MainThreadState::Codegenning);
+                    main_thread_state = MainThreadState::Idle;
                 }
 
                 // If codegen is aborted that means translation was aborted due
@@ -1590,9 +1590,9 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 Message::AddImportOnlyModule { module_data, work_product } => {
                     assert!(!started_lto);
                     assert_eq!(codegen_state, Ongoing);
-                    assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning);
+                    assert_eq!(main_thread_state, MainThreadState::Codegenning);
                     lto_import_only_modules.push((module_data, work_product));
-                    main_thread_worker_state = MainThreadWorkerState::Idle;
+                    main_thread_state = MainThreadState::Idle;
                 }
             }
         }

From 8b9e3f0dd6fcfb30162e5a54af9f82fbbcd56a05 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 20 Jul 2023 14:33:27 +1000
Subject: [PATCH 06/20] Remove an unnecessary `pub`.

---
 compiler/rustc_codegen_ssa/src/back/write.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 5ce39e88ca3d7..fcaae23d0b43f 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -2057,7 +2057,7 @@ pub fn submit_pre_lto_module_to_llvm<B: ExtraBackendMethods>(
     })));
 }
 
-pub fn pre_lto_bitcode_filename(module_name: &str) -> String {
+fn pre_lto_bitcode_filename(module_name: &str) -> String {
     format!("{module_name}.{PRE_LTO_BC_EXT}")
 }
 

From 176610c2cd8869fb889d2572b79bc1ee323f6b61 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 20 Jul 2023 14:34:17 +1000
Subject: [PATCH 07/20] Remove some unused values in `codegen_crate`.

---
 compiler/rustc_codegen_ssa/src/base.rs | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 8135b84216247..2a0894f3878d1 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -768,7 +768,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
                     module,
                     cost,
                 );
-                false
             }
             CguReuse::PreLto => {
                 submit_pre_lto_module_to_llvm(
@@ -780,7 +779,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
                         source: cgu.previous_work_product(tcx),
                     },
                 );
-                true
             }
             CguReuse::PostLto => {
                 submit_post_lto_module_to_llvm(
@@ -791,7 +789,6 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
                         source: cgu.previous_work_product(tcx),
                     },
                 );
-                true
             }
         };
     }

From e78fb95dfa40ee9fdf1997009e572fb7ce737080 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 08:37:04 +1000
Subject: [PATCH 08/20] Give the coordinator thread a name.

This is useful when profiling with a profiler like Samply.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index fcaae23d0b43f..246be71f26cc0 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -1257,7 +1257,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
     // Each LLVM module is automatically sent back to the coordinator for LTO if
     // necessary. There's already optimizations in place to avoid sending work
     // back to the coordinator if LTO isn't requested.
-    return B::spawn_thread(cgcx.time_trace, move || {
+    return B::spawn_named_thread(cgcx.time_trace, "coordinator".to_string(), move || {
         let mut worker_id_counter = 0;
         let mut free_worker_ids = Vec::new();
         let mut get_worker_id = |free_worker_ids: &mut Vec<usize>| {
@@ -1625,7 +1625,8 @@ fn start_executing_work<B: ExtraBackendMethods>(
             modules: compiled_modules,
             allocator_module: compiled_allocator_module,
         })
-    });
+    })
+    .expect("failed to spawn coordinator thread");
 
     // A heuristic that determines if we have enough LLVM WorkItems in the
     // queue so that the main thread can do LLVM work instead of codegen
@@ -1758,7 +1759,7 @@ fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>
             })
         };
     })
-    .expect("failed to spawn thread");
+    .expect("failed to spawn work thread");
 }
 
 enum SharedEmitterMessage {

From 4a120f33f78961808875460048e6fc4d4e8ba532 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 08:38:28 +1000
Subject: [PATCH 09/20] Remove `ExtraBackendMethods::spawn_thread`.

It's no longer used, and `spawn_named_thread` is preferable, because
naming threads is helpful when profiling.
---
 compiler/rustc_codegen_llvm/src/lib.rs           | 12 ------------
 compiler/rustc_codegen_ssa/src/traits/backend.rs |  9 ---------
 2 files changed, 21 deletions(-)

diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index bb264db22d9fc..a1b1dcb127847 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -141,18 +141,6 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
         back::write::target_machine_factory(sess, optlvl, target_features)
     }
 
-    fn spawn_thread<F, T>(time_trace: bool, f: F) -> std::thread::JoinHandle<T>
-    where
-        F: FnOnce() -> T,
-        F: Send + 'static,
-        T: Send + 'static,
-    {
-        std::thread::spawn(move || {
-            let _profiler = TimeTraceProfiler::new(time_trace);
-            f()
-        })
-    }
-
     fn spawn_named_thread<F, T>(
         time_trace: bool,
         name: String,
diff --git a/compiler/rustc_codegen_ssa/src/traits/backend.rs b/compiler/rustc_codegen_ssa/src/traits/backend.rs
index 1991b55f19141..0a02ca6b317c2 100644
--- a/compiler/rustc_codegen_ssa/src/traits/backend.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs
@@ -142,15 +142,6 @@ pub trait ExtraBackendMethods:
         target_features: &[String],
     ) -> TargetMachineFactoryFn<Self>;
 
-    fn spawn_thread<F, T>(_time_trace: bool, f: F) -> std::thread::JoinHandle<T>
-    where
-        F: FnOnce() -> T,
-        F: Send + 'static,
-        T: Send + 'static,
-    {
-        std::thread::spawn(f)
-    }
-
     fn spawn_named_thread<F, T>(
         _time_trace: bool,
         name: String,

From 67e4bec200d9e0b208af9bdfd917bc05dec31831 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 08:52:12 +1000
Subject: [PATCH 10/20] Fix a comment.

Make it match the corresponding comment at the start of the unstable
options.
---
 compiler/rustc_session/src/options.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
index 8eab0fb33a7af..055ab2d9c1583 100644
--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@@ -1881,6 +1881,7 @@ written to standard error output)"),
 
     // If you add a new option, please update:
     // - compiler/rustc_interface/src/tests.rs
+    // - src/doc/unstable-book/src/compiler-flags
 }
 
 #[derive(Clone, Hash, PartialEq, Eq, Debug)]

From 0ea9950ede4dd5257776b126361c93f28b556eec Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 08:58:08 +1000
Subject: [PATCH 11/20] Document `-Zno-parallel-llvm`.

Because it's usefulness wasn't clear to me, and I initially wondered if
it could be removed. The text is based on the text in #50972, the PR
that added the flag.
---
 .../unstable-book/src/compiler-flags/no-parallel-llvm.md  | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 src/doc/unstable-book/src/compiler-flags/no-parallel-llvm.md

diff --git a/src/doc/unstable-book/src/compiler-flags/no-parallel-llvm.md b/src/doc/unstable-book/src/compiler-flags/no-parallel-llvm.md
new file mode 100644
index 0000000000000..f19ba16b6f70d
--- /dev/null
+++ b/src/doc/unstable-book/src/compiler-flags/no-parallel-llvm.md
@@ -0,0 +1,8 @@
+# `no-parallel-llvm`
+
+---------------------
+
+This flag disables parallelization of codegen and linking, while otherwise preserving
+behavior with regard to codegen units and LTO.
+
+This flag is not useful for regular users, but it can be useful for debugging the backend. Codegen issues commonly only manifest under specific circumstances, e.g. if multiple codegen units are used and ThinLTO is enabled. Serialization of these threaded configurations makes the use of LLVM debugging facilities easier, by avoiding the interleaving of output.

From 3517fe899e200a2904cd0a931a7afa4b9f6fd35b Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 09:32:57 +1000
Subject: [PATCH 12/20] Remove `CodegenContext::worker`.

`CodegenContext` is immutable except for the `worker` field - we clone
`CodegenContext` in multiple places, changing the `worker` field each
time. It's simpler to move the `worker` field out of `CodegenContext`.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 36 +++++++-------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 246be71f26cc0..ef10b5c21d53c 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -349,8 +349,6 @@ pub struct CodegenContext<B: WriteBackendMethods> {
     /// Directory into which should the LLVM optimization remarks be written.
     /// If `None`, they will be written to stderr.
     pub remark_dir: Option<PathBuf>,
-    /// Worker thread number
-    pub worker: usize,
     /// The incremental compilation session directory, or None if we are not
     /// compiling incrementally
     pub incr_comp_session_dir: Option<PathBuf>,
@@ -1104,7 +1102,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
         exported_symbols,
         remark: sess.opts.cg.remark.clone(),
         remark_dir,
-        worker: 0,
         incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()),
         cgu_reuse_tracker: sess.cgu_reuse_tracker.clone(),
         coordinator_send,
@@ -1355,17 +1352,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         // LLVM work too.
                         let (item, _) =
                             work_items.pop().expect("queue empty - queue_full_enough() broken?");
-                        let cgcx = CodegenContext {
-                            worker: get_worker_id(&mut free_worker_ids),
-                            ..cgcx.clone()
-                        };
                         maybe_start_llvm_timer(
                             prof,
                             cgcx.config(item.module_kind()),
                             &mut llvm_start_time,
                         );
                         main_thread_state = MainThreadState::Lending;
-                        spawn_work(cgcx, item);
+                        spawn_work(&cgcx, get_worker_id(&mut free_worker_ids), item);
                     }
                 }
             } else if codegen_state == Completed {
@@ -1404,17 +1397,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 match main_thread_state {
                     MainThreadState::Idle => {
                         if let Some((item, _)) = work_items.pop() {
-                            let cgcx = CodegenContext {
-                                worker: get_worker_id(&mut free_worker_ids),
-                                ..cgcx.clone()
-                            };
                             maybe_start_llvm_timer(
                                 prof,
                                 cgcx.config(item.module_kind()),
                                 &mut llvm_start_time,
                             );
                             main_thread_state = MainThreadState::Lending;
-                            spawn_work(cgcx, item);
+                            spawn_work(&cgcx, get_worker_id(&mut free_worker_ids), item);
                         } else {
                             // There is no unstarted work, so let the main thread
                             // take over for a running worker. Otherwise the
@@ -1450,11 +1439,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 let (item, _) = work_items.pop().unwrap();
 
                 maybe_start_llvm_timer(prof, cgcx.config(item.module_kind()), &mut llvm_start_time);
-
-                let cgcx =
-                    CodegenContext { worker: get_worker_id(&mut free_worker_ids), ..cgcx.clone() };
-
-                spawn_work(cgcx, item);
+                spawn_work(&cgcx, get_worker_id(&mut free_worker_ids), item);
                 running_with_own_token += 1;
             }
 
@@ -1700,7 +1685,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
 #[must_use]
 pub struct WorkerFatalError;
 
-fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>) {
+fn spawn_work<B: ExtraBackendMethods>(
+    cgcx: &CodegenContext<B>,
+    worker_id: usize,
+    work: WorkItem<B>,
+) {
+    let cgcx = cgcx.clone();
+
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {
         // Set up a destructor which will fire off a message that we're done as
         // we exit.
@@ -1723,11 +1714,8 @@ fn spawn_work<B: ExtraBackendMethods>(cgcx: CodegenContext<B>, work: WorkItem<B>
             }
         }
 
-        let mut bomb = Bomb::<B> {
-            coordinator_send: cgcx.coordinator_send.clone(),
-            result: None,
-            worker_id: cgcx.worker,
-        };
+        let mut bomb =
+            Bomb::<B> { coordinator_send: cgcx.coordinator_send.clone(), result: None, worker_id };
 
         // Execute the work itself, and if it finishes successfully then flag
         // ourselves as a success as well.

From d21d31cce7675f9b454022827a304919ceb5e9f0 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 09:22:32 +1000
Subject: [PATCH 13/20] Move `maybe_start_llvm_timer`'s body into `spawn_work`.

The two functions are alway called together. This commit factors out the
repeated code.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 44 ++++++++------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index ef10b5c21d53c..d42d5fe0be278 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -1306,7 +1306,6 @@ fn start_executing_work<B: ExtraBackendMethods>(
         // `running + if main_thread_state == Lending { 1 } else { 0 }`.
         let mut running_with_own_token = 0;
 
-        let prof = &cgcx.prof;
         let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;
 
         // Run the message loop while there's still anything that needs message
@@ -1352,13 +1351,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
                         // LLVM work too.
                         let (item, _) =
                             work_items.pop().expect("queue empty - queue_full_enough() broken?");
-                        maybe_start_llvm_timer(
-                            prof,
-                            cgcx.config(item.module_kind()),
+                        main_thread_state = MainThreadState::Lending;
+                        spawn_work(
+                            &cgcx,
                             &mut llvm_start_time,
+                            get_worker_id(&mut free_worker_ids),
+                            item,
                         );
-                        main_thread_state = MainThreadState::Lending;
-                        spawn_work(&cgcx, get_worker_id(&mut free_worker_ids), item);
                     }
                 }
             } else if codegen_state == Completed {
@@ -1397,13 +1396,13 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 match main_thread_state {
                     MainThreadState::Idle => {
                         if let Some((item, _)) = work_items.pop() {
-                            maybe_start_llvm_timer(
-                                prof,
-                                cgcx.config(item.module_kind()),
+                            main_thread_state = MainThreadState::Lending;
+                            spawn_work(
+                                &cgcx,
                                 &mut llvm_start_time,
+                                get_worker_id(&mut free_worker_ids),
+                                item,
                             );
-                            main_thread_state = MainThreadState::Lending;
-                            spawn_work(&cgcx, get_worker_id(&mut free_worker_ids), item);
                         } else {
                             // There is no unstarted work, so let the main thread
                             // take over for a running worker. Otherwise the
@@ -1437,9 +1436,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 && running_with_own_token < tokens.len()
             {
                 let (item, _) = work_items.pop().unwrap();
-
-                maybe_start_llvm_timer(prof, cgcx.config(item.module_kind()), &mut llvm_start_time);
-                spawn_work(&cgcx, get_worker_id(&mut free_worker_ids), item);
+                spawn_work(&cgcx, &mut llvm_start_time, get_worker_id(&mut free_worker_ids), item);
                 running_with_own_token += 1;
             }
 
@@ -1669,27 +1666,22 @@ fn start_executing_work<B: ExtraBackendMethods>(
         let quarter_of_workers = workers_running - 3 * workers_running / 4;
         items_in_queue > 0 && items_in_queue >= quarter_of_workers
     }
-
-    fn maybe_start_llvm_timer<'a>(
-        prof: &'a SelfProfilerRef,
-        config: &ModuleConfig,
-        llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
-    ) {
-        if config.time_module && llvm_start_time.is_none() {
-            *llvm_start_time = Some(prof.verbose_generic_activity("LLVM_passes"));
-        }
-    }
 }
 
 /// `FatalError` is explicitly not `Send`.
 #[must_use]
 pub struct WorkerFatalError;
 
-fn spawn_work<B: ExtraBackendMethods>(
-    cgcx: &CodegenContext<B>,
+fn spawn_work<'a, B: ExtraBackendMethods>(
+    cgcx: &'a CodegenContext<B>,
+    llvm_start_time: &mut Option<VerboseTimingGuard<'a>>,
     worker_id: usize,
     work: WorkItem<B>,
 ) {
+    if cgcx.config(work.module_kind()).time_module && llvm_start_time.is_none() {
+        *llvm_start_time = Some(cgcx.prof.verbose_generic_activity("LLVM_passes"));
+    }
+
     let cgcx = cgcx.clone();
 
     B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || {

From 179bf19813af8b312da2529d3577a98be125c135 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 09:48:57 +1000
Subject: [PATCH 14/20] Tweak a loop condition.

This loop condition involves `codegen_state`, `work_items`, and
`running_with_own_token`. But the body of the loop cannot modify
`codegen_state`, so repeatedly checking it is unnecessary.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index d42d5fe0be278..a0e32752a992d 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -1431,13 +1431,17 @@ fn start_executing_work<B: ExtraBackendMethods>(
 
             // Spin up what work we can, only doing this while we've got available
             // parallelism slots and work left to spawn.
-            while codegen_state != Aborted
-                && !work_items.is_empty()
-                && running_with_own_token < tokens.len()
-            {
-                let (item, _) = work_items.pop().unwrap();
-                spawn_work(&cgcx, &mut llvm_start_time, get_worker_id(&mut free_worker_ids), item);
-                running_with_own_token += 1;
+            if codegen_state != Aborted {
+                while !work_items.is_empty() && running_with_own_token < tokens.len() {
+                    let (item, _) = work_items.pop().unwrap();
+                    spawn_work(
+                        &cgcx,
+                        &mut llvm_start_time,
+                        get_worker_id(&mut free_worker_ids),
+                        item,
+                    );
+                    running_with_own_token += 1;
+                }
             }
 
             // Relinquish accidentally acquired extra tokens.

From a08220bcabe6f5ae3b01596311948a42dafbe4ec Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 10:02:52 +1000
Subject: [PATCH 15/20] Tweak structure of the message loop.

The main loop has a *very* complex condition, which includes two
mentions of `codegen_state`. The body of the loop then immediately
switches on the `codegen_state`.

I find it easier to understand if it's a `loop` and we check for exit
conditions after switching on `codegen_state`. We end up with a tiny bit
of code duplication, but it's clear that (a) we never exit in the
`Ongoing` case, (b) we exit in the `Completed` state only if several
things are true (and there's interaction with LTO there), and (c) we
exit in the `Aborted` state if a couple of things are true. Also, the
exit conditions are all simple conjunctions.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 37 +++++++++++---------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index a0e32752a992d..3fde50d46e32e 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -1313,16 +1313,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
         // wait for all existing work to finish, so many of the conditions here
         // only apply if codegen hasn't been aborted as they represent pending
         // work to be done.
-        while codegen_state == Ongoing
-            || running_with_own_token > 0
-            || main_thread_state == MainThreadState::Lending
-            || (codegen_state == Completed
-                && !(work_items.is_empty()
-                    && needs_fat_lto.is_empty()
-                    && needs_thin_lto.is_empty()
-                    && lto_import_only_modules.is_empty()
-                    && main_thread_state == MainThreadState::Idle))
-        {
+        loop {
             // While there are still CGUs to be codegened, the coordinator has
             // to decide how to utilize the compiler processes implicit Token:
             // For codegenning more CGU or for running them through LLVM.
@@ -1361,15 +1352,24 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     }
                 }
             } else if codegen_state == Completed {
-                // If we've finished everything related to normal codegen
-                // then it must be the case that we've got some LTO work to do.
-                // Perform the serial work here of figuring out what we're
-                // going to LTO and then push a bunch of work items onto our
-                // queue to do LTO
-                if work_items.is_empty()
-                    && running_with_own_token == 0
+                if running_with_own_token == 0
                     && main_thread_state == MainThreadState::Idle
+                    && work_items.is_empty()
                 {
+                    // All codegen work is done. Do we have LTO work to do?
+                    if needs_fat_lto.is_empty()
+                        && needs_thin_lto.is_empty()
+                        && lto_import_only_modules.is_empty()
+                    {
+                        // Nothing more to do!
+                        break;
+                    }
+
+                    // We have LTO work to do. Perform the serial work here of
+                    // figuring out what we're going to LTO and then push a
+                    // bunch of work items onto our queue to do LTO. This all
+                    // happens on the coordinator thread but it's very quick so
+                    // we don't worry about tokens.
                     assert!(!started_lto);
                     started_lto = true;
 
@@ -1427,6 +1427,9 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 // Don't queue up any more work if codegen was aborted, we're
                 // just waiting for our existing children to finish.
                 assert!(codegen_state == Aborted);
+                if running_with_own_token == 0 && main_thread_state != MainThreadState::Lending {
+                    break;
+                }
             }
 
             // Spin up what work we can, only doing this while we've got available

From 3b44f5b0ebf7bdacde8b6e3e50582b5ea329da15 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 11:18:25 +1000
Subject: [PATCH 16/20] Use standard Rust capitalization rules for names
 containing "LTO".

---
 compiler/rustc_codegen_gcc/src/lib.rs          |  8 ++++----
 compiler/rustc_codegen_llvm/src/back/lto.rs    | 10 +++++-----
 compiler/rustc_codegen_llvm/src/lib.rs         |  4 ++--
 compiler/rustc_codegen_ssa/src/back/write.rs   | 18 +++++++++---------
 compiler/rustc_codegen_ssa/src/traits/write.rs |  4 ++--
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs
index 04ac0254a81ba..697ae015fed9a 100644
--- a/compiler/rustc_codegen_gcc/src/lib.rs
+++ b/compiler/rustc_codegen_gcc/src/lib.rs
@@ -71,7 +71,7 @@ use gccjit::{Context, OptimizationLevel, CType};
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
 use rustc_codegen_ssa::base::codegen_crate;
-use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, ModuleConfig, TargetMachineFactoryFn};
+use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn};
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
 use rustc_codegen_ssa::target_features::supported_target_features;
 use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, ModuleBufferMethods, ThinBufferMethods, WriteBackendMethods};
@@ -217,14 +217,14 @@ impl WriteBackendMethods for GccCodegenBackend {
     type ThinData = ();
     type ThinBuffer = ThinBuffer;
 
-    fn run_fat_lto(_cgcx: &CodegenContext<Self>, mut modules: Vec<FatLTOInput<Self>>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> {
+    fn run_fat_lto(_cgcx: &CodegenContext<Self>, mut modules: Vec<FatLtoInput<Self>>, _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>) -> Result<LtoModuleCodegen<Self>, FatalError> {
         // TODO(antoyo): implement LTO by sending -flto to libgccjit and adding the appropriate gcc linker plugins.
         // NOTE: implemented elsewhere.
         // TODO(antoyo): what is implemented elsewhere ^ ?
         let module =
             match modules.remove(0) {
-                FatLTOInput::InMemory(module) => module,
-                FatLTOInput::Serialized { .. } => {
+                FatLtoInput::InMemory(module) => module,
+                FatLtoInput::Serialized { .. } => {
                     unimplemented!();
                 }
             };
diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs
index bf0ed8a4de0bc..b2d28cef89976 100644
--- a/compiler/rustc_codegen_llvm/src/back/lto.rs
+++ b/compiler/rustc_codegen_llvm/src/back/lto.rs
@@ -7,7 +7,7 @@ use crate::{LlvmCodegenBackend, ModuleLlvm};
 use object::read::archive::ArchiveFile;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::symbol_export;
-use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, TargetMachineFactoryConfig};
+use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput, TargetMachineFactoryConfig};
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
 use rustc_data_structures::fx::FxHashMap;
@@ -166,7 +166,7 @@ fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], LtoBitcodeFro
 /// for further optimization.
 pub(crate) fn run_fat(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
-    modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
+    modules: Vec<FatLtoInput<LlvmCodegenBackend>>,
     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 ) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
     let diag_handler = cgcx.create_diag_handler();
@@ -220,7 +220,7 @@ pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBu
 fn fat_lto(
     cgcx: &CodegenContext<LlvmCodegenBackend>,
     diag_handler: &Handler,
-    modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
+    modules: Vec<FatLtoInput<LlvmCodegenBackend>>,
     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
     mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
     symbols_below_threshold: &[*const libc::c_char],
@@ -245,8 +245,8 @@ fn fat_lto(
     }));
     for module in modules {
         match module {
-            FatLTOInput::InMemory(m) => in_memory.push(m),
-            FatLTOInput::Serialized { name, buffer } => {
+            FatLtoInput::InMemory(m) => in_memory.push(m),
+            FatLtoInput::Serialized { name, buffer } => {
                 info!("pushing serialized module {:?}", name);
                 let buffer = SerializedModule::Local(buffer);
                 serialized_modules.push((buffer, CString::new(name).unwrap()));
diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs
index a1b1dcb127847..d283299ac46b4 100644
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@@ -28,7 +28,7 @@ pub use llvm_util::target_features;
 use rustc_ast::expand::allocator::AllocatorKind;
 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
 use rustc_codegen_ssa::back::write::{
-    CodegenContext, FatLTOInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn,
+    CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn,
 };
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::ModuleCodegen;
@@ -200,7 +200,7 @@ impl WriteBackendMethods for LlvmCodegenBackend {
     }
     fn run_fat_lto(
         cgcx: &CodegenContext<Self>,
-        modules: Vec<FatLTOInput<Self>>,
+        modules: Vec<FatLtoInput<Self>>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
     ) -> Result<LtoModuleCodegen<Self>, FatalError> {
         back::lto::run_fat(cgcx, modules, cached_modules)
diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 3fde50d46e32e..67916ba12a2f5 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -374,7 +374,7 @@ impl<B: WriteBackendMethods> CodegenContext<B> {
 
 fn generate_lto_work<B: ExtraBackendMethods>(
     cgcx: &CodegenContext<B>,
-    needs_fat_lto: Vec<FatLTOInput<B>>,
+    needs_fat_lto: Vec<FatLtoInput<B>>,
     needs_thin_lto: Vec<(String, B::ThinBuffer)>,
     import_only_modules: Vec<(SerializedModule<B::ModuleBuffer>, WorkProduct)>,
 ) -> Vec<(WorkItem<B>, u64)> {
@@ -758,14 +758,14 @@ pub(crate) enum WorkItemResult<B: WriteBackendMethods> {
 
     /// The backend has finished compiling a CGU, which now needs to go through
     /// fat LTO.
-    NeedsFatLTO(FatLTOInput<B>),
+    NeedsFatLto(FatLtoInput<B>),
 
     /// The backend has finished compiling a CGU, which now needs to go through
     /// thin LTO.
-    NeedsThinLTO(String, B::ThinBuffer),
+    NeedsThinLto(String, B::ThinBuffer),
 }
 
-pub enum FatLTOInput<B: WriteBackendMethods> {
+pub enum FatLtoInput<B: WriteBackendMethods> {
     Serialized { name: String, buffer: B::ModuleBuffer },
     InMemory(ModuleCodegen<B::Module>),
 }
@@ -854,7 +854,7 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
                     panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e);
                 });
             }
-            Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer))
+            Ok(WorkItemResult::NeedsThinLto(name, thin_buffer))
         }
         ComputedLtoType::Fat => match bitcode {
             Some(path) => {
@@ -862,9 +862,9 @@ fn execute_optimize_work_item<B: ExtraBackendMethods>(
                 fs::write(&path, buffer.data()).unwrap_or_else(|e| {
                     panic!("Error writing pre-lto-bitcode file `{}`: {}", path.display(), e);
                 });
-                Ok(WorkItemResult::NeedsFatLTO(FatLTOInput::Serialized { name, buffer }))
+                Ok(WorkItemResult::NeedsFatLto(FatLtoInput::Serialized { name, buffer }))
             }
-            None => Ok(WorkItemResult::NeedsFatLTO(FatLTOInput::InMemory(module))),
+            None => Ok(WorkItemResult::NeedsFatLto(FatLtoInput::InMemory(module))),
         },
     }
 }
@@ -1554,12 +1554,12 @@ fn start_executing_work<B: ExtraBackendMethods>(
                             assert!(compiled_modules.is_empty());
                             needs_link.push(module);
                         }
-                        Ok(WorkItemResult::NeedsFatLTO(fat_lto_input)) => {
+                        Ok(WorkItemResult::NeedsFatLto(fat_lto_input)) => {
                             assert!(!started_lto);
                             assert!(needs_thin_lto.is_empty());
                             needs_fat_lto.push(fat_lto_input);
                         }
-                        Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer)) => {
+                        Ok(WorkItemResult::NeedsThinLto(name, thin_buffer)) => {
                             assert!(!started_lto);
                             assert!(needs_fat_lto.is_empty());
                             needs_thin_lto.push((name, thin_buffer));
diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs
index a3b11ed4fe8f6..ecf5095d8a335 100644
--- a/compiler/rustc_codegen_ssa/src/traits/write.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/write.rs
@@ -1,5 +1,5 @@
 use crate::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule};
-use crate::back::write::{CodegenContext, FatLTOInput, ModuleConfig};
+use crate::back::write::{CodegenContext, FatLtoInput, ModuleConfig};
 use crate::{CompiledModule, ModuleCodegen};
 
 use rustc_errors::{FatalError, Handler};
@@ -23,7 +23,7 @@ pub trait WriteBackendMethods: 'static + Sized + Clone {
     /// for further optimization.
     fn run_fat_lto(
         cgcx: &CodegenContext<Self>,
-        modules: Vec<FatLTOInput<Self>>,
+        modules: Vec<FatLtoInput<Self>>,
         cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
     ) -> Result<LtoModuleCodegen<Self>, FatalError>;
     /// Performs thin LTO by performing necessary global analysis and returning two

From 90ce358afa1ac796e320f9c60463b604f30c7eeb Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Fri, 21 Jul 2023 13:32:29 +1000
Subject: [PATCH 17/20] Introduce `running_with_any_token` closure.

It makes things a little clearer.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 67916ba12a2f5..1f6d73994cf86 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -1300,12 +1300,16 @@ fn start_executing_work<B: ExtraBackendMethods>(
         let mut main_thread_state = MainThreadState::Idle;
 
         // How many LLVM worker threads are running while holding a Token. This
-        // *excludes* the LLVM worker thread that the main thread is lending a
-        // Token to (when the main thread is in the `Lending` state).
-        // In other words, the number of LLVM threads is actually equal to
-        // `running + if main_thread_state == Lending { 1 } else { 0 }`.
+        // *excludes* any that the main thread is lending a Token to.
         let mut running_with_own_token = 0;
 
+        // How many LLVM worker threads are running in total. This *includes*
+        // any that the main thread is lending a Token to.
+        let running_with_any_token = |main_thread_state, running_with_own_token| {
+            running_with_own_token
+                + if main_thread_state == MainThreadState::Lending { 1 } else { 0 }
+        };
+
         let mut llvm_start_time: Option<VerboseTimingGuard<'_>> = None;
 
         // Run the message loop while there's still anything that needs message
@@ -1352,8 +1356,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                     }
                 }
             } else if codegen_state == Completed {
-                if running_with_own_token == 0
-                    && main_thread_state == MainThreadState::Idle
+                if running_with_any_token(main_thread_state, running_with_own_token) == 0
                     && work_items.is_empty()
                 {
                     // All codegen work is done. Do we have LTO work to do?
@@ -1427,7 +1430,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
                 // Don't queue up any more work if codegen was aborted, we're
                 // just waiting for our existing children to finish.
                 assert!(codegen_state == Aborted);
-                if running_with_own_token == 0 && main_thread_state != MainThreadState::Lending {
+                if running_with_any_token(main_thread_state, running_with_own_token) == 0 {
                     break;
                 }
             }

From d404699fb170d146114d9be2cb2f5935aeee4047 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Mon, 24 Jul 2023 08:24:51 +1000
Subject: [PATCH 18/20] Fix LLVM thread names on Windows.

PR #112946 tweaked the naming of LLVM threads, but messed things up
slightly, resulting in threads on Windows having names like `optimize
module {} regex.f10ba03eb5ec7975-cgu.0`.

This commit removes the extraneous `{} `.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 1f6d73994cf86..5eff62df7d65e 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -740,9 +740,9 @@ impl<B: WriteBackendMethods> WorkItem<B> {
         }
 
         match self {
-            WorkItem::Optimize(m) => desc("opt", "optimize module {}", &m.name),
-            WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for {}", &m.name),
-            WorkItem::LTO(m) => desc("lto", "LTO module {}", m.name()),
+            WorkItem::Optimize(m) => desc("opt", "optimize module", &m.name),
+            WorkItem::CopyPostLtoArtifacts(m) => desc("cpy", "copy LTO artifacts for", &m.name),
+            WorkItem::LTO(m) => desc("lto", "LTO module", m.name()),
         }
     }
 }

From 5673f4704298fa5ba3e0f7db55a2775bbea7e7a7 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Mon, 24 Jul 2023 17:43:53 +1000
Subject: [PATCH 19/20] Clean up `generate_lto_work`.

This function has some shared code for the thin LTO and fat LTO cases,
but those cases have so little in common that it's actually clearer to
treat them fully separately.
---
 compiler/rustc_codegen_ssa/src/back/write.rs | 45 ++++++++++----------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs
index 5eff62df7d65e..4ef337bc67706 100644
--- a/compiler/rustc_codegen_ssa/src/back/write.rs
+++ b/compiler/rustc_codegen_ssa/src/back/write.rs
@@ -380,32 +380,33 @@ fn generate_lto_work<B: ExtraBackendMethods>(
 ) -> Vec<(WorkItem<B>, u64)> {
     let _prof_timer = cgcx.prof.generic_activity("codegen_generate_lto_work");
 
-    let (lto_modules, copy_jobs) = if !needs_fat_lto.is_empty() {
+    if !needs_fat_lto.is_empty() {
         assert!(needs_thin_lto.is_empty());
-        let lto_module =
+        let module =
             B::run_fat_lto(cgcx, needs_fat_lto, import_only_modules).unwrap_or_else(|e| e.raise());
-        (vec![lto_module], vec![])
+        // We are adding a single work item, so the cost doesn't matter.
+        vec![(WorkItem::LTO(module), 0)]
     } else {
         assert!(needs_fat_lto.is_empty());
-        B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules).unwrap_or_else(|e| e.raise())
-    };
-
-    lto_modules
-        .into_iter()
-        .map(|module| {
-            let cost = module.cost();
-            (WorkItem::LTO(module), cost)
-        })
-        .chain(copy_jobs.into_iter().map(|wp| {
-            (
-                WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
-                    name: wp.cgu_name.clone(),
-                    source: wp,
-                }),
-                0,
-            )
-        }))
-        .collect()
+        let (lto_modules, copy_jobs) = B::run_thin_lto(cgcx, needs_thin_lto, import_only_modules)
+            .unwrap_or_else(|e| e.raise());
+        lto_modules
+            .into_iter()
+            .map(|module| {
+                let cost = module.cost();
+                (WorkItem::LTO(module), cost)
+            })
+            .chain(copy_jobs.into_iter().map(|wp| {
+                (
+                    WorkItem::CopyPostLtoArtifacts(CachedModuleCodegen {
+                        name: wp.cgu_name.clone(),
+                        source: wp,
+                    }),
+                    0, // copying is very cheap
+                )
+            }))
+            .collect()
+    }
 }
 
 pub struct CompiledModules {

From c17c8dc78e475e2df377b3cd5c56e6233a1a5157 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Mon, 31 Jul 2023 16:34:13 +1000
Subject: [PATCH 20/20] Remove unnecessary semicolon.

---
 compiler/rustc_codegen_ssa/src/base.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
index 2a0894f3878d1..0ccb08c78f75f 100644
--- a/compiler/rustc_codegen_ssa/src/base.rs
+++ b/compiler/rustc_codegen_ssa/src/base.rs
@@ -790,7 +790,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
                     },
                 );
             }
-        };
+        }
     }
 
     ongoing_codegen.codegen_finished(tcx);