Starter Task 1: Get learning rate for llm_pte_finetuning example from config file

IshanAryendu · web-flow · commit ae6b1f165bed · 2025-06-06T18:32:15.000-07:00
Differential Revision: D75807517 Pull Request resolved: #11445
diff --git a/examples/llm_pte_finetuning/llama3_config.yaml b/examples/llm_pte_finetuning/llama3_config.yaml
@@ -24,6 +24,8 @@ dataset:
 seed: null
 shuffle: True
 
+learning_rate: 5e-3
+
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
   checkpoint_dir: /tmp/Llama-3.2-1B-Instruct/
diff --git a/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml b/examples/llm_pte_finetuning/phi3_alpaca_code_config.yaml
@@ -13,6 +13,8 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.phi3.lora_phi3_mini
   lora_attn_modules: ['q_proj', 'v_proj']
diff --git a/examples/llm_pte_finetuning/phi3_config.yaml b/examples/llm_pte_finetuning/phi3_config.yaml
@@ -12,6 +12,8 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.phi3.lora_phi3_mini
   lora_attn_modules: ['q_proj', 'v_proj']
diff --git a/examples/llm_pte_finetuning/qwen_05b_config.yaml b/examples/llm_pte_finetuning/qwen_05b_config.yaml
@@ -13,12 +13,16 @@ batch_size: 1
 loss:
   _component_: torch.nn.CrossEntropyLoss
 
+learning_rate: 5e-3
+
 model:
   _component_: torchtune.models.qwen2.lora_qwen2_0_5b
   lora_attn_modules: ['q_proj', 'k_proj', 'v_proj']
   apply_lora_to_mlp: False
   lora_rank: 32
   lora_alpha: 64
+  # lr parameter is not supported by lora_qwen2_0_5b function
+  # lr: 5e-3
 
 checkpointer:
   _component_: torchtune.training.FullModelHFCheckpointer
diff --git a/examples/llm_pte_finetuning/runner.py b/examples/llm_pte_finetuning/runner.py
@@ -84,7 +84,7 @@ def main() -> None:
         # params run from [param_start, outputs_end]
         grad_start = et_mod.run_method("__et_training_gradients_index_forward", [])[0]
         param_start = et_mod.run_method("__et_training_parameters_index_forward", [])[0]
-        learning_rate = 5e-3
+        learning_rate = cfg.learning_rate
         f.seek(0)
         losses = []
         for i, batch in tqdm(enumerate(train_dataloader), total=num_training_steps):
diff --git a/examples/llm_pte_finetuning/training_lib.py b/examples/llm_pte_finetuning/training_lib.py
@@ -106,10 +106,6 @@ def eval_model(
         token_size = tokens.shape[1]
         labels_size = labels.shape[1]
 
-        tokens, labels = batch["tokens"], batch["labels"]
-        token_size = tokens.shape[1]
-        labels_size = labels.shape[1]
-
         # Fixed length for now. We need to resize as the input shapes
         # should be the same passed as examples to the export function.
         if token_size > max_seq_len: