From bec7babec9c924a0ee7ad27e3f6582bc5bd1fef5 Mon Sep 17 00:00:00 2001 From: "Wei (Will) Feng" <134637289+weifengpy@users.noreply.github.com> Date: Tue, 23 Apr 2024 08:25:56 -0700 Subject: [PATCH] [FSDP1] reduce GPU memory usage from 78G instead of 23G (#843) --- recipes/lora_finetune_distributed.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/lora_finetune_distributed.py b/recipes/lora_finetune_distributed.py index ee5475a201..b1c50ba193 100644 --- a/recipes/lora_finetune_distributed.py +++ b/recipes/lora_finetune_distributed.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import os import sys import time @@ -600,7 +601,7 @@ def recipe_main(cfg: DictConfig) -> None: "Distributed finetune recipe should be run via a distributed launcher." "If using tune CLI, please specify --nnodes 1 and --nproc_per_node [num_gpus]" ) - + os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1" init_process_group(backend="gloo" if cfg.device == "cpu" else "nccl") config.log_config(recipe_name="LoRAFinetuneRecipeDistributed", cfg=cfg)