Skip to content

Commit cf0bfd2

Browse files
authored
Intorduce TextLLMRunner. (#12055)
Summary: . Reviewed By: mergennachin Differential Revision: D77416842
1 parent 4374afe commit cf0bfd2

File tree

7 files changed

+218
-56
lines changed

7 files changed

+218
-56
lines changed

.lintrunner.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ exclude_patterns = [
7676
'examples/demo-apps/apple_ios/**',
7777
'examples/demo-apps/react-native/rnllama/ios/**',
7878
'extension/apple/**',
79+
'extension/llm/apple/**',
7980
# File contains @generated
8081
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
8182
'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',

examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,16 @@
1010

1111
NS_ASSUME_NONNULL_BEGIN
1212

13-
FOUNDATION_EXPORT NSErrorDomain const LLaMARunnerErrorDomain;
14-
1513
@interface LLaMARunner : NSObject
1614

17-
- (instancetype)initWithModelPath:(NSString*)filePath
18-
tokenizerPath:(NSString*)tokenizerPath;
15+
- (instancetype)initWithModelPath:(NSString *)modelPath
16+
tokenizerPath:(NSString *)tokenizerPath;
1917
- (BOOL)isLoaded;
20-
- (BOOL)loadWithError:(NSError**)error;
21-
- (BOOL)generate:(NSString*)prompt
22-
sequenceLength:(NSInteger)seq_len
23-
withTokenCallback:(nullable void (^)(NSString*))callback
24-
error:(NSError**)error;
18+
- (BOOL)loadWithError:(NSError **)error;
19+
- (BOOL)generate:(NSString *)prompt
20+
sequenceLength:(NSInteger)seq_len
21+
withTokenCallback:(nullable void (^)(NSString *))callback
22+
error:(NSError **)error;
2523
- (void)stop;
2624

2725
+ (instancetype)new NS_UNAVAILABLE;

examples/demo-apps/apple_ios/LLaMA/LLaMARunner/LLaMARunner/Exported/LLaMARunner.mm

Lines changed: 23 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,29 @@
99
#import "LLaMARunner.h"
1010

1111
#import <ExecuTorch/ExecuTorchLog.h>
12-
#import <executorch/extension/llm/runner/text_llm_runner.h>
12+
#import <ExecuTorchLLM/ExecuTorchLLM.h>
1313
#import <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
1414

15-
using namespace executorch::extension;
16-
using namespace executorch::runtime;
17-
18-
NSErrorDomain const LLaMARunnerErrorDomain = @"LLaMARunnerErrorDomain";
19-
2015
@interface LLaMARunner ()<ExecuTorchLogSink>
2116
@end
2217

2318
@implementation LLaMARunner {
24-
std::unique_ptr<llm::TextLLMRunner> _runner;
19+
ExecuTorchTextLLMRunner *_runner;
2520
}
2621

27-
- (instancetype)initWithModelPath:(NSString*)modelPath
28-
tokenizerPath:(NSString*)tokenizerPath {
22+
- (instancetype)initWithModelPath:(NSString *)modelPath
23+
tokenizerPath:(NSString *)tokenizerPath {
2924
self = [super init];
3025
if (self) {
3126
[ExecuTorchLog.sharedLog addSink:self];
32-
_runner = llm::create_text_llm_runner(
33-
modelPath.UTF8String,
34-
llm::load_tokenizer(
35-
tokenizerPath.UTF8String,
36-
example::get_special_tokens(example::Version::Default)
37-
)
38-
);
27+
auto tokens = example::get_special_tokens(example::Version::Default);
28+
NSMutableArray<NSString*> *specialTokens = [[NSMutableArray alloc] initWithCapacity:tokens->size()];
29+
for (const auto &token : *tokens) {
30+
[specialTokens addObject:(NSString *)@(token.c_str())];
31+
}
32+
_runner = [[ExecuTorchTextLLMRunner alloc] initWithModelPath:modelPath
33+
tokenizerPath:tokenizerPath
34+
specialTokens:specialTokens];
3935
}
4036
return self;
4137
}
@@ -45,45 +41,25 @@ - (void)dealloc {
4541
}
4642

4743
- (BOOL)isLoaded {
48-
return _runner->is_loaded();
44+
return [_runner isLoaded];
4945
}
5046

5147
- (BOOL)loadWithError:(NSError**)error {
52-
const auto status = _runner->load();
53-
if (status != Error::Ok) {
54-
if (error) {
55-
*error = [NSError errorWithDomain:LLaMARunnerErrorDomain
56-
code:(NSInteger)status
57-
userInfo:nil];
58-
}
59-
return NO;
60-
}
61-
return YES;
48+
return [_runner loadWithError:error];
6249
}
6350

64-
- (BOOL)generate:(NSString*)prompt
65-
sequenceLength:(NSInteger)seq_len
66-
withTokenCallback:(nullable void (^)(NSString*))callback
67-
error:(NSError**)error {
68-
const auto status = _runner->generate(
69-
prompt.UTF8String,
70-
llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
71-
[callback](const std::string& token) {
72-
callback(@(token.c_str()));
73-
});
74-
if (status != Error::Ok) {
75-
if (error) {
76-
*error = [NSError errorWithDomain:LLaMARunnerErrorDomain
77-
code:(NSInteger)status
78-
userInfo:nil];
79-
}
80-
return NO;
81-
}
82-
return YES;
51+
- (BOOL)generate:(NSString *)prompt
52+
sequenceLength:(NSInteger)seq_len
53+
withTokenCallback:(nullable void (^)(NSString *))callback
54+
error:(NSError **)error {
55+
return [_runner generate:prompt
56+
sequenceLength:seq_len
57+
withTokenCallback:callback
58+
error:error];
8359
}
8460

8561
- (void)stop {
86-
_runner->stop();
62+
[_runner stop];
8763
}
8864

8965
#pragma mark - ExecuTorchLogSink
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import "ExecuTorchTextLLMRunner.h"
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import <Foundation/Foundation.h>
10+
11+
NS_ASSUME_NONNULL_BEGIN
12+
13+
FOUNDATION_EXPORT NSErrorDomain const ExecuTorchTextLLMRunnerErrorDomain;
14+
15+
/**
16+
A wrapper class for the C++ llm::TextLLMRunner that provides
17+
Objective-C APIs to load models, manage tokenization with custom
18+
special tokens, generate text sequences, and stop the runner.
19+
*/
20+
NS_SWIFT_NAME(TextLLMRunner)
21+
__attribute__((deprecated("This API is experimental.")))
22+
@interface ExecuTorchTextLLMRunner : NSObject
23+
24+
/**
25+
Initializes a text LLM runner with the given model and tokenizer paths,
26+
and a list of special tokens to include in the tokenizer.
27+
28+
@param modelPath File system path to the serialized model.
29+
@param tokenizerPath File system path to the tokenizer data.
30+
@param tokens An array of NSString special tokens to use during tokenization.
31+
@return An initialized ExecuTorchTextLLMRunner instance.
32+
*/
33+
- (instancetype)initWithModelPath:(NSString *)modelPath
34+
tokenizerPath:(NSString *)tokenizerPath
35+
specialTokens:(NSArray<NSString *> *)tokens;
36+
37+
/**
38+
Checks whether the underlying model has been successfully loaded.
39+
40+
@return YES if the model is loaded, NO otherwise.
41+
*/
42+
- (BOOL)isLoaded;
43+
44+
/**
45+
Loads the model into memory, returning an error if loading fails.
46+
47+
@param error On failure, populated with an NSError explaining the issue.
48+
@return YES if loading succeeds, NO if an error occurred.
49+
*/
50+
- (BOOL)loadWithError:(NSError **)error;
51+
52+
/**
53+
Generates text given an input prompt, up to a specified sequence length.
54+
Invokes the provided callback for each generated token.
55+
56+
@param prompt The initial text prompt to generate from.
57+
@param seq_len The maximum number of tokens to generate.
58+
@param callback A block called with each generated token as an NSString.
59+
@param error On failure, populated with an NSError explaining the issue.
60+
@return YES if generation completes successfully, NO if an error occurred.
61+
*/
62+
- (BOOL)generate:(NSString *)prompt
63+
sequenceLength:(NSInteger)seq_len
64+
withTokenCallback:(nullable void (^)(NSString *))callback
65+
error:(NSError **)error;
66+
67+
/**
68+
Stops any ongoing generation and cleans up internal resources.
69+
*/
70+
- (void)stop;
71+
72+
@end
73+
74+
NS_ASSUME_NONNULL_END
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#import "ExecuTorchTextLLMRunner.h"
10+
11+
#import <executorch/extension/llm/runner/text_llm_runner.h>
12+
13+
using namespace executorch::extension;
14+
using namespace executorch::runtime;
15+
16+
NSErrorDomain const ExecuTorchTextLLMRunnerErrorDomain = @"ExecuTorchTextLLMRunnerErrorDomain";
17+
18+
@implementation ExecuTorchTextLLMRunner {
19+
NSString *_modelPath;
20+
NSString *_tokenizerPath;
21+
std::unique_ptr<std::vector<std::string>> _specialTokens;
22+
std::unique_ptr<llm::TextLLMRunner> _runner;
23+
}
24+
25+
- (instancetype)initWithModelPath:(NSString*)modelPath
26+
tokenizerPath:(NSString*)tokenizerPath
27+
specialTokens:(NSArray<NSString*>*)tokens {
28+
self = [super init];
29+
if (self) {
30+
_modelPath = [modelPath copy];
31+
_tokenizerPath = [tokenizerPath copy];
32+
_specialTokens = std::make_unique<std::vector<std::string>>();
33+
for (NSString *token in tokens) {
34+
_specialTokens->emplace_back(token.UTF8String);
35+
}
36+
}
37+
return self;
38+
}
39+
40+
- (BOOL)isLoaded {
41+
return _runner && _runner->is_loaded();
42+
}
43+
44+
- (BOOL)loadWithError:(NSError**)error {
45+
if (![self isLoaded]) {
46+
_runner = llm::create_text_llm_runner(
47+
_modelPath.UTF8String,
48+
llm::load_tokenizer(_tokenizerPath.UTF8String, std::move(_specialTokens))
49+
);
50+
if (!_runner) {
51+
if (error) {
52+
*error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain
53+
code:-1
54+
userInfo:@{NSLocalizedDescriptionKey: @"Failed to create runner"}];
55+
}
56+
return NO;
57+
}
58+
}
59+
auto status = _runner->load();
60+
if (status != Error::Ok) {
61+
if (error) {
62+
*error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain
63+
code:(NSInteger)status
64+
userInfo:nil];
65+
}
66+
return NO;
67+
}
68+
return YES;
69+
}
70+
71+
- (BOOL)generate:(NSString*)prompt
72+
sequenceLength:(NSInteger)seq_len
73+
withTokenCallback:(nullable void (^)(NSString*))callback
74+
error:(NSError**)error {
75+
if (![self loadWithError:error]) {
76+
return NO;
77+
}
78+
auto status = _runner->generate(
79+
prompt.UTF8String,
80+
llm::GenerationConfig{.seq_len = static_cast<int32_t>(seq_len)},
81+
[callback](const std::string& token) {
82+
if (callback) callback(@(token.c_str()));
83+
}
84+
);
85+
if (status != Error::Ok) {
86+
if (error) {
87+
*error = [NSError errorWithDomain:ExecuTorchTextLLMRunnerErrorDomain
88+
code:(NSInteger)status
89+
userInfo:nil];
90+
}
91+
return NO;
92+
}
93+
return YES;
94+
}
95+
96+
- (void)stop {
97+
if (_runner) {
98+
_runner->stop();
99+
}
100+
}
101+
102+
@end

extension/llm/runner/text_llm_runner.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <executorch/extension/llm/runner/text_llm_runner.h>
1414
#include <executorch/extension/llm/runner/util.h>
15+
#include <executorch/runtime/platform/runtime.h>
1516
#include <pytorch/tokenizers/hf_tokenizer.h>
1617
#include <pytorch/tokenizers/llama2c_tokenizer.h>
1718
#include <pytorch/tokenizers/sentencepiece.h>
@@ -256,6 +257,7 @@ std::unique_ptr<tokenizers::Tokenizer> load_tokenizer(
256257
std::optional<std::string> pattern,
257258
size_t bos_token_index,
258259
size_t eos_token_index) {
260+
runtime::runtime_init();
259261
auto json_tokenizer = std::make_unique<tokenizers::HFTokenizer>();
260262
if (json_tokenizer->load(tokenizer_path) == ::tokenizers::Error::Ok) {
261263
ET_LOG(Info, "Loaded json tokenizer");

0 commit comments

Comments
 (0)