pytorch · facebook-github-bot · Jun 7, 2025 · Jun 6, 2025
@@ -24,6 +24,8 @@ dataset:
 seed: null
 shuffle: True
 
+learning_rate: 5e-3
+
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
   checkpoint_dir: /tmp/Llama-3.2-1B-Instruct/

@@ -13,6 +13,8 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.phi3.lora_phi3_mini
   lora_attn_modules: ['q_proj', 'v_proj']

@@ -12,6 +12,8 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.phi3.lora_phi3_mini
   lora_attn_modules: ['q_proj', 'v_proj']

@@ -13,12 +13,16 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.qwen2.lora_qwen2_0_5b
   lora_attn_modules: ['q_proj', 'k_proj', 'v_proj']
   apply_lora_to_mlp: False
   lora_rank: 32
   lora_alpha: 64
+  # lr parameter is not supported by lora_qwen2_0_5b function
+  # lr: 5e-3
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer

@@ -84,7 +84,7 @@ def main() -> None:
         # params run from [param_start, outputs_end]
         grad_start = et_mod.run_method("__et_training_gradients_index_forward", [])[0]
         param_start = et_mod.run_method("__et_training_parameters_index_forward", [])[0]
-        learning_rate = 5e-3
+        learning_rate = cfg.learning_rate
         f.seek(0)
         losses = []
         for i, batch in tqdm(enumerate(train_dataloader), total=num_training_steps):

@@ -106,10 +106,6 @@ def eval_model(
         token_size = tokens.shape[1]
         labels_size = labels.shape[1]
 
-        tokens, labels = batch["tokens"], batch["labels"]
-        token_size = tokens.shape[1]
-        labels_size = labels.shape[1]
-
         # Fixed length for now. We need to resize as the input shapes
         # should be the same passed as examples to the export function.
         if token_size > max_seq_len: