Skip to content

Commit

Permalink
Using FormattedCheckpointFiles in configs (#2147)
Browse files Browse the repository at this point in the history
  • Loading branch information
SalmanMohammadi authored Dec 13, 2024
1 parent df50637 commit cdaece1
Show file tree
Hide file tree
Showing 12 changed files with 36 additions and 326 deletions.
20 changes: 3 additions & 17 deletions recipes/configs/llama2/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,23 +29,9 @@ tokenizer:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Llama-2-70b-hf
checkpoint_files: [
pytorch_model-00001-of-00015.bin,
pytorch_model-00002-of-00015.bin,
pytorch_model-00003-of-00015.bin,
pytorch_model-00004-of-00015.bin,
pytorch_model-00005-of-00015.bin,
pytorch_model-00006-of-00015.bin,
pytorch_model-00007-of-00015.bin,
pytorch_model-00008-of-00015.bin,
pytorch_model-00009-of-00015.bin,
pytorch_model-00010-of-00015.bin,
pytorch_model-00011-of-00015.bin,
pytorch_model-00012-of-00015.bin,
pytorch_model-00013-of-00015.bin,
pytorch_model-00014-of-00015.bin,
pytorch_model-00015-of-00015.bin,
]
checkpoint_files:
filename_format: pytorch_model-{}-of-{}.bin
max_filename: 00015
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA2
Expand Down
20 changes: 3 additions & 17 deletions recipes/configs/llama2/70B_qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,9 @@ tokenizer:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Llama-2-70b-hf
checkpoint_files: [
pytorch_model-00001-of-00015.bin,
pytorch_model-00002-of-00015.bin,
pytorch_model-00003-of-00015.bin,
pytorch_model-00004-of-00015.bin,
pytorch_model-00005-of-00015.bin,
pytorch_model-00006-of-00015.bin,
pytorch_model-00007-of-00015.bin,
pytorch_model-00008-of-00015.bin,
pytorch_model-00009-of-00015.bin,
pytorch_model-00010-of-00015.bin,
pytorch_model-00011-of-00015.bin,
pytorch_model-00012-of-00015.bin,
pytorch_model-00013-of-00015.bin,
pytorch_model-00014-of-00015.bin,
pytorch_model-00015-of-00015.bin,
]
checkpoint_files:
filename_format: pytorch_model-{}-of-{}.bin
max_filename: 00015
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA2
Expand Down
35 changes: 3 additions & 32 deletions recipes/configs/llama3/70B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,38 +39,9 @@ model:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-70B-Instruct
checkpoint_files: [
model-00001-of-00030.safetensors,
model-00002-of-00030.safetensors,
model-00003-of-00030.safetensors,
model-00004-of-00030.safetensors,
model-00005-of-00030.safetensors,
model-00006-of-00030.safetensors,
model-00007-of-00030.safetensors,
model-00008-of-00030.safetensors,
model-00009-of-00030.safetensors,
model-00010-of-00030.safetensors,
model-00011-of-00030.safetensors,
model-00012-of-00030.safetensors,
model-00013-of-00030.safetensors,
model-00014-of-00030.safetensors,
model-00015-of-00030.safetensors,
model-00016-of-00030.safetensors,
model-00017-of-00030.safetensors,
model-00018-of-00030.safetensors,
model-00019-of-00030.safetensors,
model-00020-of-00030.safetensors,
model-00021-of-00030.safetensors,
model-00022-of-00030.safetensors,
model-00023-of-00030.safetensors,
model-00024-of-00030.safetensors,
model-00025-of-00030.safetensors,
model-00026-of-00030.safetensors,
model-00027-of-00030.safetensors,
model-00028-of-00030.safetensors,
model-00029-of-00030.safetensors,
model-00030-of-00030.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00030
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3
Expand Down
35 changes: 3 additions & 32 deletions recipes/configs/llama3/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,9 @@ tokenizer:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3-70B-Instruct
checkpoint_files: [
model-00001-of-00030.safetensors,
model-00002-of-00030.safetensors,
model-00003-of-00030.safetensors,
model-00004-of-00030.safetensors,
model-00005-of-00030.safetensors,
model-00006-of-00030.safetensors,
model-00007-of-00030.safetensors,
model-00008-of-00030.safetensors,
model-00009-of-00030.safetensors,
model-00010-of-00030.safetensors,
model-00011-of-00030.safetensors,
model-00012-of-00030.safetensors,
model-00013-of-00030.safetensors,
model-00014-of-00030.safetensors,
model-00015-of-00030.safetensors,
model-00016-of-00030.safetensors,
model-00017-of-00030.safetensors,
model-00018-of-00030.safetensors,
model-00019-of-00030.safetensors,
model-00020-of-00030.safetensors,
model-00021-of-00030.safetensors,
model-00022-of-00030.safetensors,
model-00023-of-00030.safetensors,
model-00024-of-00030.safetensors,
model-00025-of-00030.safetensors,
model-00026-of-00030.safetensors,
model-00027-of-00030.safetensors,
model-00028-of-00030.safetensors,
model-00029-of-00030.safetensors,
model-00030-of-00030.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00030
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3
Expand Down
35 changes: 3 additions & 32 deletions recipes/configs/llama3_1/70B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,38 +38,9 @@ model:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
model-00002-of-00030.safetensors,
model-00003-of-00030.safetensors,
model-00004-of-00030.safetensors,
model-00005-of-00030.safetensors,
model-00006-of-00030.safetensors,
model-00007-of-00030.safetensors,
model-00008-of-00030.safetensors,
model-00009-of-00030.safetensors,
model-00010-of-00030.safetensors,
model-00011-of-00030.safetensors,
model-00012-of-00030.safetensors,
model-00013-of-00030.safetensors,
model-00014-of-00030.safetensors,
model-00015-of-00030.safetensors,
model-00016-of-00030.safetensors,
model-00017-of-00030.safetensors,
model-00018-of-00030.safetensors,
model-00019-of-00030.safetensors,
model-00020-of-00030.safetensors,
model-00021-of-00030.safetensors,
model-00022-of-00030.safetensors,
model-00023-of-00030.safetensors,
model-00024-of-00030.safetensors,
model-00025-of-00030.safetensors,
model-00026-of-00030.safetensors,
model-00027-of-00030.safetensors,
model-00028-of-00030.safetensors,
model-00029-of-00030.safetensors,
model-00030-of-00030.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00030
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3
Expand Down
35 changes: 3 additions & 32 deletions recipes/configs/llama3_1/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,38 +28,9 @@ tokenizer:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Meta-Llama-3.1-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
model-00002-of-00030.safetensors,
model-00003-of-00030.safetensors,
model-00004-of-00030.safetensors,
model-00005-of-00030.safetensors,
model-00006-of-00030.safetensors,
model-00007-of-00030.safetensors,
model-00008-of-00030.safetensors,
model-00009-of-00030.safetensors,
model-00010-of-00030.safetensors,
model-00011-of-00030.safetensors,
model-00012-of-00030.safetensors,
model-00013-of-00030.safetensors,
model-00014-of-00030.safetensors,
model-00015-of-00030.safetensors,
model-00016-of-00030.safetensors,
model-00017-of-00030.safetensors,
model-00018-of-00030.safetensors,
model-00019-of-00030.safetensors,
model-00020-of-00030.safetensors,
model-00021-of-00030.safetensors,
model-00022-of-00030.safetensors,
model-00023-of-00030.safetensors,
model-00024-of-00030.safetensors,
model-00025-of-00030.safetensors,
model-00026-of-00030.safetensors,
model-00027-of-00030.safetensors,
model-00028-of-00030.safetensors,
model-00029-of-00030.safetensors,
model-00030-of-00030.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00030
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3
Expand Down
35 changes: 3 additions & 32 deletions recipes/configs/llama3_3/70B_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,38 +38,9 @@ model:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Llama-3.3-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
model-00002-of-00030.safetensors,
model-00003-of-00030.safetensors,
model-00004-of-00030.safetensors,
model-00005-of-00030.safetensors,
model-00006-of-00030.safetensors,
model-00007-of-00030.safetensors,
model-00008-of-00030.safetensors,
model-00009-of-00030.safetensors,
model-00010-of-00030.safetensors,
model-00011-of-00030.safetensors,
model-00012-of-00030.safetensors,
model-00013-of-00030.safetensors,
model-00014-of-00030.safetensors,
model-00015-of-00030.safetensors,
model-00016-of-00030.safetensors,
model-00017-of-00030.safetensors,
model-00018-of-00030.safetensors,
model-00019-of-00030.safetensors,
model-00020-of-00030.safetensors,
model-00021-of-00030.safetensors,
model-00022-of-00030.safetensors,
model-00023-of-00030.safetensors,
model-00024-of-00030.safetensors,
model-00025-of-00030.safetensors,
model-00026-of-00030.safetensors,
model-00027-of-00030.safetensors,
model-00028-of-00030.safetensors,
model-00029-of-00030.safetensors,
model-00030-of-00030.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00030
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3
Expand Down
35 changes: 3 additions & 32 deletions recipes/configs/llama3_3/70B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,38 +28,9 @@ tokenizer:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Llama-3.3-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
model-00002-of-00030.safetensors,
model-00003-of-00030.safetensors,
model-00004-of-00030.safetensors,
model-00005-of-00030.safetensors,
model-00006-of-00030.safetensors,
model-00007-of-00030.safetensors,
model-00008-of-00030.safetensors,
model-00009-of-00030.safetensors,
model-00010-of-00030.safetensors,
model-00011-of-00030.safetensors,
model-00012-of-00030.safetensors,
model-00013-of-00030.safetensors,
model-00014-of-00030.safetensors,
model-00015-of-00030.safetensors,
model-00016-of-00030.safetensors,
model-00017-of-00030.safetensors,
model-00018-of-00030.safetensors,
model-00019-of-00030.safetensors,
model-00020-of-00030.safetensors,
model-00021-of-00030.safetensors,
model-00022-of-00030.safetensors,
model-00023-of-00030.safetensors,
model-00024-of-00030.safetensors,
model-00025-of-00030.safetensors,
model-00026-of-00030.safetensors,
model-00027-of-00030.safetensors,
model-00028-of-00030.safetensors,
model-00029-of-00030.safetensors,
model-00030-of-00030.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00030
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3
Expand Down
35 changes: 3 additions & 32 deletions recipes/configs/llama3_3/70B_qlora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,38 +28,9 @@ tokenizer:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Llama-3.3-70B-Instruct/
checkpoint_files: [
model-00001-of-00030.safetensors,
model-00002-of-00030.safetensors,
model-00003-of-00030.safetensors,
model-00004-of-00030.safetensors,
model-00005-of-00030.safetensors,
model-00006-of-00030.safetensors,
model-00007-of-00030.safetensors,
model-00008-of-00030.safetensors,
model-00009-of-00030.safetensors,
model-00010-of-00030.safetensors,
model-00011-of-00030.safetensors,
model-00012-of-00030.safetensors,
model-00013-of-00030.safetensors,
model-00014-of-00030.safetensors,
model-00015-of-00030.safetensors,
model-00016-of-00030.safetensors,
model-00017-of-00030.safetensors,
model-00018-of-00030.safetensors,
model-00019-of-00030.safetensors,
model-00020-of-00030.safetensors,
model-00021-of-00030.safetensors,
model-00022-of-00030.safetensors,
model-00023-of-00030.safetensors,
model-00024-of-00030.safetensors,
model-00025-of-00030.safetensors,
model-00026-of-00030.safetensors,
model-00027-of-00030.safetensors,
model-00028-of-00030.safetensors,
model-00029-of-00030.safetensors,
model-00030-of-00030.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00030
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: LLAMA3
Expand Down
13 changes: 3 additions & 10 deletions recipes/configs/qwen2_5/14B_lora_single_device.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,9 @@ tokenizer:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Qwen2_5-14B-Instruct
checkpoint_files: [
model-00001-of-00008.safetensors,
model-00002-of-00008.safetensors,
model-00003-of-00008.safetensors,
model-00004-of-00008.safetensors,
model-00005-of-00008.safetensors,
model-00006-of-00008.safetensors,
model-00007-of-00008.safetensors,
model-00008-of-00008.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00008
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: QWEN2
Expand Down
22 changes: 3 additions & 19 deletions recipes/configs/qwen2_5/32B_lora.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,25 +35,9 @@ tokenizer:
checkpointer:
_component_: torchtune.training.FullModelHFCheckpointer
checkpoint_dir: /tmp/Qwen2_5-32B-Instruct
checkpoint_files: [
model-00001-of-00017.safetensors,
model-00002-of-00017.safetensors,
model-00003-of-00017.safetensors,
model-00004-of-00017.safetensors,
model-00005-of-00017.safetensors,
model-00006-of-00017.safetensors,
model-00007-of-00017.safetensors,
model-00008-of-00017.safetensors,
model-00009-of-00017.safetensors,
model-00010-of-00017.safetensors,
model-00011-of-00017.safetensors,
model-00012-of-00017.safetensors,
model-00013-of-00017.safetensors,
model-00014-of-00017.safetensors,
model-00015-of-00017.safetensors,
model-00016-of-00017.safetensors,
model-00017-of-00017.safetensors,
]
checkpoint_files:
filename_format: model-{}-of-{}.safetensors
max_filename: 00017
recipe_checkpoint: null
output_dir: ${output_dir}
model_type: QWEN2
Expand Down
Loading

0 comments on commit cdaece1

Please sign in to comment.