pytorch · metascroy · May 2, 2025 · May 2, 2025 · May 2, 2025 · May 2, 2025
@@ -25,4 +25,6 @@ You are a helpful assistant.
 """
 
     public static let llama3PromptTemplate = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>%@<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
+
+public static let phi4PromptTemplate = "<|user|>%@<|end|><|assistant|>"
 }
@@ -86,6 +86,7 @@ struct ContentView: View {
     case llama
     case llava
     case qwen3
+    case phi4
 
     static func fromPath(_ path: String) -> ModelType {
       let filename = (path as NSString).lastPathComponent.lowercased()
@@ -95,8 +96,10 @@ struct ContentView: View {
         return .llava
       } else if filename.hasPrefix("qwen3") {
         return .qwen3
+      } else if filename.hasPrefix("phi4") {
+        return .phi4
       }
-      print("Unknown model type in path: \(path). Model filename should start with one of: llama, llava, or qwen3")
+      print("Unknown model type in path: \(path). Model filename should start with one of: llama, llava, qwen3, or phi4")
       exit(1)
     }
   }
@@ -343,15 +346,15 @@ struct ContentView: View {
       }
 
       switch modelType {
-      case .llama, .qwen3:
+      case .llama, .qwen3, .phi4:
         runnerHolder.runner = runnerHolder.runner ?? Runner(modelPath: modelPath, tokenizerPath: tokenizerPath)
       case .llava:
         runnerHolder.llavaRunner = runnerHolder.llavaRunner ?? LLaVARunner(modelPath: modelPath, tokenizerPath: tokenizerPath)
       }
 
       guard !shouldStopGenerating else { return }
       switch modelType {
-      case .llama, .qwen3:
+      case .llama, .qwen3, .phi4:
         if let runner = runnerHolder.runner, !runner.isLoaded() {
           var error: Error?
           let startLoadTime = Date()
@@ -474,12 +477,14 @@ struct ContentView: View {
             prompt = String(format: Constants.llama3PromptTemplate, text)
           case .llava:
             prompt = String(format: Constants.llama3PromptTemplate, text)
+          case .phi4:
+              prompt = String(format: Constants.phi4PromptTemplate, text)
           }
 
           try runnerHolder.runner?.generate(prompt, sequenceLength: seq_len) { token in
 
             if token != prompt {
-              if token == "<|eot_id|>" {
+                if token == "<|eot_id|>" {
                 // hack to fix the issue that extension/llm/runner/text_token_generator.h
                 // keeps generating after <|eot_id|>
                 shouldStopShowingToken = true

@@ -32,12 +32,12 @@ Download already exported LLaMA/LLaVA models along with tokenizers from [Hugging
     ```bash
     open examples/demo-apps/apple_ios/LLaMA/LLaMA.xcodeproj
     ```
-    
+
 3. Click the Play button to launch the app in the Simulator.
 
 4. To run on a device, ensure you have it set up for development and a provisioning profile with the `increased-memory-limit` entitlement. Update the app's bundle identifier to match your provisioning profile with the required capability.
 
-5. After successfully launching the app, copy the exported ExecuTorch model (`.pte`) and tokenizer (`.model`) files to the iLLaMA folder. Three models are currently supported at the moment - Llama, Qwen3, and Llava multimodal. Please ensure that your model `.pte` file starts with `llama`, `qwen3`, or `llava` so that the app selects the correct model type.
+5. After successfully launching the app, copy the exported ExecuTorch model (`.pte`) and tokenizer (`.model`) files to the iLLaMA folder. Four models are currently supported at the moment - Llama, Qwen3, Phi4-mini, and Llava multimodal. Please ensure that your model `.pte` file starts with `llama`, `qwen3`, `phi4` or `llava` so that the app selects the correct model type.
 
     - **For the Simulator:** Drag and drop both files onto the Simulator window and save them in the `On My iPhone > iLLaMA` folder.
     - **For a Device:** Open a separate Finder window, navigate to the Files tab, drag and drop both files into the iLLaMA folder, and wait for the copying to finish.
-Original file line number
+Diff line change
@@ Expand Up / @@ -25,4 +25,6 @@ You are a helpful assistant. @@
     """
         public static let llama3PromptTemplate = "<|begin_of_text|><|start_header_id|>user<|end_header_id|>%@<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
+    public static let phi4PromptTemplate = "<|user|>%@<|end|><|assistant|>"
     }