(uncommitted/untracked changes)

Ishan Aryendu · facebook-github-bot · commit 5e518e1157ee · 2025-06-04T09:12:39.000-07:00
diff --git a/examples/llm_pte_finetuning/my_qwen_05b_config.yaml b/examples/llm_pte_finetuning/my_qwen_05b_config.yaml
@@ -0,0 +1,40 @@
+tokenizer:
+  _component_: torchtune.models.qwen2.qwen2_tokenizer
+  path: /tmp/Qwen2-0.5B-Instruct/vocab.json
+  merges_file: /tmp/Qwen2-0.5B-Instruct/merges.txt
+  max_seq_len: 512
+
+dataset:
+  _component_: torchtune.datasets.alpaca_cleaned_dataset
+seed: null
+shuffle: True
+batch_size: 1
+
+loss:
+  _component_: torch.nn.CrossEntropyLoss
+
+model:
+  _component_: torchtune.models.qwen2.lora_qwen2_0_5b
+  lora_attn_modules: ['q_proj', 'k_proj', 'v_proj']
+  apply_lora_to_mlp: False
+  lora_rank: 32
+  lora_alpha: 64
+  learning_rate: 5e-3
+
+checkpointer:
+  _component_: torchtune.training.FullModelHFCheckpointer
+  checkpoint_dir: /tmp/Qwen2-0.5B-Instruct
+  checkpoint_files: [
+    model.safetensors
+  ]
+  recipe_checkpoint: null
+  output_dir: /tmp/qwen_0.5B_ft-output
+  model_type: QWEN2
+resume_from_checkpoint: False
+save_adapter_weights_only: False
+
+device: cpu
+dtype: fp32
+
+enable_activation_checkpointing: True
+compile: False
diff --git a/examples/llm_pte_finetuning/runner.py b/examples/llm_pte_finetuning/runner.py
@@ -84,7 +84,7 @@ def main() -> None:
         # params run from [param_start, outputs_end]
         grad_start = et_mod.run_method("__et_training_gradients_index_forward", [])[0]
         param_start = et_mod.run_method("__et_training_parameters_index_forward", [])[0]
-        learning_rate = 5e-3
+        learning_rate = cfg.model.learning_rate
         f.seek(0)
         losses = []
         for i, batch in tqdm(enumerate(train_dataloader), total=num_training_steps):