Skip to content

derive tokenizer type from model configuration #409

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 46 additions & 33 deletions build/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,25 +139,21 @@ def from_speculative_args(cls, args): # -> BuilderArgs:
@dataclass
class TokenizerArgs:
tokenizer_path: Optional[Union[Path, str]] = None
is_sentencepiece: bool = True
is_sentencepiece: bool = False
is_tiktoken: bool = False

def validate_model(
def update_from_model(
self,
model: Transformer,
model_description: str = "model",
):
if model is None:
) -> None:
if not (self.is_sentencepiece or self.tiktoken):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should self.tiktoken -> self.is_tiktoken? (from definition on line 143)

Copy link
Contributor Author

@mikekgfb mikekgfb Apr 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah but I’m not liking it anyway. I’ll try another way. This one just turns what we have into one horrible mess because it combines everything with everything. That being said, config _only is here to stay

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So if you’re going to get called with that, you can just return an empty model without loading anything other than configs.

can we get trustworthy information whether a model wants tiktoken or sentence from GGUF file?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like we may be able to extract the name of the tokenizer model: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#tokenizer

self.tiktoken = model.config.use_tiktoken
self.sentencepiece = not model.config.use_tiktoken
return

use_tiktoken = model.config.use_tiktoken
is_tiktoken = self.is_tiktoken

if use_tiktoken is None:
model.config.use_tiktoken = is_tiktoken
elif use_tiktoken != is_tiktoken:
raise RuntimeError(
f"model-specified tokenizer ({tokenizer_setting_to_name(use_tiktoken)} does not match provided tokenizer ({tokenizer_setting_to_name(is_tiktoken)} for {model_description}"
if not (self.is_tiktoken == model.config.use_tiktoken):
print(
"Warnng: command line specified tiktoken, but model expects sentencepiece tokenizer"
)

@classmethod
Expand Down Expand Up @@ -197,16 +193,22 @@ def from_args(cls, args): # -> TokenizerArgs:


def _initialize_tokenizer(tokenizer_args: TokenizerArgs):
if tokenizer_args.is_sentencepiece:
from sentencepiece import SentencePieceProcessor
try:
if tokenizer_args.is_tiktoken:
from tokenizer.tiktoken import Tokenizer as TiktokenTokenizer

return SentencePieceProcessor(model_file=str(tokenizer_args.tokenizer_path))
elif tokenizer_args.is_tiktoken:
from tokenizer.tiktoken import Tokenizer as TiktokenTokenizer
return TiktokenTokenizer(model_path=str(tokenizer_args.tokenizer_path))
elif tokenizer_args.is_sentencepiece:
from sentencepiece import SentencePieceProcessor

return TiktokenTokenizer(model_path=str(tokenizer_args.tokenizer_path))
else:
raise RuntimeError("must specify a valid tokenizer in TokenizerArgs")
return SentencePieceProcessor(model_file=str(tokenizer_args.tokenizer_path))
else:
error = "must specify a valid tokenizer in TokenizerArgs"
except Exception as e:
error = str(e)

message = f"Error intializing tokenizer. Did you specify the right tokenizer path and type? Original error: {error}"
raise RuntimeError(message)


torch._inductor.config.coordinate_descent_tuning = True
Expand Down Expand Up @@ -237,7 +239,7 @@ def _unset_gguf_kwargs(builder_args):
builder_args.gguf_kwargs = None


def _load_model_gguf(builder_args):
def _load_model_gguf(builder_args, config_only=False):
assert builder_args.gguf_path
if builder_args.gguf_kwargs is None:
kwargs = {}
Expand All @@ -247,7 +249,7 @@ def _load_model_gguf(builder_args):
return model


def _load_model_default(builder_args):
def _load_model_default(builder_args, config_only=False):
assert not builder_args.gguf_path

with torch.device("meta"):
Expand Down Expand Up @@ -300,11 +302,11 @@ def _load_model_default(builder_args):
return model


def _load_model(builder_args):
def _load_model(builder_args, config_only=False):
if builder_args.gguf_path:
model = _load_model_gguf(builder_args)
model = _load_model_gguf(builder_args, config_only)
else:
model = _load_model_default(builder_args)
model = _load_model_default(builder_args, config_only)

if builder_args.use_tp:
from tp import apply_tp
Expand All @@ -318,6 +320,7 @@ def _load_model(builder_args):

def _initialize_model(
builder_args,
tokenizer_args,
quantize,
tokenizer=None,
):
Expand All @@ -339,10 +342,19 @@ def _initialize_model(
device_sync(device=builder_args.device)
print(f"Time to load model: {time.time() - t0:.02f} seconds")

if not tokenizer:
try:
tokenizer_args.update_from_model(model_)
tokenizer = _initialize_tokenizer(tokenizer_args)
except:
tokenizer = None

if builder_args.dso_path:
assert (
quantize is None or quantize == "{ }"
), "quantize not valid for exported DSO model. Specify quantization during export."
if not (quantize is None or quantize == "{ }"):
raise RuntimeError(
"quantize not valid for exported DSO model. Specify quantization during export."
)

try:
model = model_
# Replace model forward with the AOT-compiled forward
Expand All @@ -357,9 +369,10 @@ def _initialize_model(
except:
raise RuntimeError(f"Failed to load AOTI compiled {builder_args.dso_path}")
elif builder_args.pte_path:
assert (
quantize is None or quantize == "{ }"
), "quantize not valid for exported PTE model. Specify quantization during export."
if not (quantize is None or quantize == "{ }"):
raise RuntimeError(
"quantize not valid for exported PTE model. Specify quantization during export."
)
try:
from build.model_et import PTEModel

Expand All @@ -383,7 +396,7 @@ def _initialize_model(

model.to(dtype=builder_args.precision)

return model
return model, tokenizer


def tokenizer_setting_to_name(tiktoken: bool = False) -> str:
Expand Down
2 changes: 1 addition & 1 deletion build/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class ModelArgs:
norm_eps: float = 1e-5
multiple_of: int = 256
ffn_dim_multiplier: Optional[int] = None
use_tiktoken: Optional[bool] = None
use_tiktoken: bool = False

def __post_init__(self):
if self.n_local_heads == -1:
Expand Down
12 changes: 8 additions & 4 deletions build/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
# LICENSE file in the root directory of this source tree.

from __future__ import annotations
from typing import List
from pathlib import Path
import os

import logging
import os
from pathlib import Path
from typing import List

import torch

Expand Down Expand Up @@ -83,9 +84,11 @@ def name_to_dtype(name):
else:
raise RuntimeError(f"unsupported dtype name {name} specified")


def allowable_dtype_names() -> List[str]:
return name_to_dtype_dict.keys()


name_to_dtype_dict = {
"fp32": torch.float,
"fp16": torch.float16,
Expand All @@ -101,7 +104,8 @@ def allowable_dtype_names() -> List[str]:
#########################################################################
### general model build utility functions for CLI ###

def allowable_params_table() -> List[dtr]:

def allowable_params_table() -> List[str]:
config_path = Path(f"{str(Path(__file__).parent)}/known_model_params")
known_model_params = [
config.replace(".json", "") for config in os.listdir(config_path)
Expand Down
6 changes: 3 additions & 3 deletions cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
import json
from pathlib import Path

import torch

from build.utils import allowable_dtype_names, allowable_params_table
from download import download_and_convert, is_model_downloaded

import torch

# CPU is always available and also exportable to ExecuTorch
default_device = "cpu" # 'cuda' if torch.cuda.is_available() else 'cpu'

Expand Down Expand Up @@ -223,7 +223,7 @@ def add_arguments(parser):
"-d",
"--dtype",
default="float32",
choices = allowable_dtype_names(),
choices=allowable_dtype_names(),
help="Override the dtype of the model (default is the checkpoint dtype). Options: bf16, fp16, fp32",
)
parser.add_argument(
Expand Down
6 changes: 2 additions & 4 deletions eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,12 @@ def main(args) -> None:
print(f"Using device={device}")
set_precision(builder_args.precision)

tokenizer = _initialize_tokenizer(tokenizer_args)
builder_args.setup_caches = False
model = _initialize_model(
model, tokenizer = _initialize_model(
builder_args,
tokenizer_args,
quantize,
tokenizer,
)
tokenizer_args.validate_model(model)

if compile:
assert not (
Expand Down
38 changes: 26 additions & 12 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,44 +48,58 @@ def main(args):
output_pte_path = args.output_pte_path
output_dso_path = args.output_dso_path

# tokenizer needed for quantization so get that here,
try:
tokenizer_args = TokenizerArgs.from_args(args)
except:
tokenizer_args = None

# This is heroic, but still confusing to the user
# because they think they are in fect exporting the
# same model when they's not. In set_backend we just
# throw an exception if the user asks for both, but
# we need to know which one.
model_to_pte = None
model_to_dso = None

# TODO: clean this up
# This mess is because ET does not support _weight_int4pack_mm right now
if not builder_args.gguf_path:
# tokenizer needed for quantization so get that here,
try:
tokenizer_args = TokenizerArgs.from_args(args)
tokenizer = _initialize_tokenizer(tokenizer_args)
except:
tokenizer = None

model = _initialize_model(
model, tokenizer = _initialize_model(
builder_args,
tokenizer_args,
quantize,
tokenizer,
)

model_to_pte = model
model_to_dso = model
else:
# for now, we simply don't suport GPTQ for GGUF
if output_pte_path:
print(
"Warning: may not be able to represent quantized models, dequantizing when necessary"
)
_set_gguf_kwargs(builder_args, is_et=True, context="export")
model_to_pte = _initialize_model(
model_to_pte, tokenizer = _initialize_model(
builder_args,
tokenizer_args,
quantize,
)
_unset_gguf_kwargs(builder_args)

if output_dso_path:
_set_gguf_kwargs(builder_args, is_et=False, context="export")
model_to_dso = _initialize_model(
model_to_dso, tokenizer = _initialize_model(
builder_args,
tokenizer_args,
quantize,
)
_unset_gguf_kwargs(builder_args)

with torch.no_grad():
if output_pte_path:
output_pte_path = str(os.path.abspath(output_pte_path))
print(f">{output_pte_path}<")
# print(f">{output_pte_path}<")
if executorch_export_available:
print(f"Exporting model using ExecuTorch to {output_pte_path}")
export_model_et(
Expand Down
16 changes: 10 additions & 6 deletions generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,24 +428,28 @@ def _main(
)
# raise RuntimeError("You need to use --is-chat-model to indicate model has chat support.")

tokenizer = _initialize_tokenizer(tokenizer_args)

builder_args.setup_caches = False
model = _initialize_model(builder_args, quantize, tokenizer)
model, tokenizer = _initialize_model(
builder_args,
tokenizer_args,
quantize,
)
tokenizer_args.update_from_model(model)

# will add a version of _initialize_model in future
# (need additional args)
if is_speculative:
draft_model = _initialize_model(
draft_model, _ = _initialize_model(
speculative_builder_args,
tokenizer_args,
quantize if draft_quantize == "quantize" else draft_quantize,
tokenizer,
)
else:
draft_model = None

tokenizer_args.validate_model(model)
tokenizer_args.validate_model(draft_model, "draft model")
# tokenizer_args.validate_model(model)
# tokenizer_args.validate_model(draft_model, "draft model")
generator_args.validate_build(builder_args)
generator_args.validate_build(speculative_builder_args, "draft model")

Expand Down