Skip to content

convert: remove most of the n_mult usage in convert.py #3098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 10, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 1 addition & 17 deletions convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,6 @@ def type_for_tensor(self, name: str, tensor: LazyTensor) -> DataType:
class Params:
n_vocab: int
n_embd: int
n_mult: int
n_layer: int
n_ctx: int
n_ff: int
Expand All @@ -161,15 +160,6 @@ class Params:
# path to the directory containing the model files
path_model: Path | None = None

@staticmethod
def find_n_mult(n_ff: int, n_embd: int) -> int:
# hardcoded magic range
for n_mult in range(8192, 1, -1):
calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
if calc_ff == n_ff:
return n_mult
raise Exception(f"failed to find n_mult for (n_ff={n_ff}, n_embd={n_embd}).")

@staticmethod
def guessed(model: LazyModel) -> Params:
# try transformer naming first
Expand Down Expand Up @@ -197,7 +187,6 @@ def guessed(model: LazyModel) -> Params:
return Params(
n_vocab = n_vocab,
n_embd = n_embd,
n_mult = n_mult,
n_layer = n_layer,
n_ctx = -1,
n_ff = n_ff,
Expand Down Expand Up @@ -225,8 +214,6 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
else:
f_rope_scale = None

n_mult = Params.find_n_mult(n_ff, n_embd)

if "max_sequence_length" in config:
n_ctx = config["max_sequence_length"]
elif "max_position_embeddings" in config:
Expand All @@ -238,7 +225,6 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
return Params(
n_vocab = n_vocab,
n_embd = n_embd,
n_mult = n_mult,
n_layer = n_layer,
n_ctx = n_ctx,
n_ff = n_ff,
Expand All @@ -250,15 +236,14 @@ def loadHFTransformerJson(model: LazyModel, config_path: Path) -> Params:
)

# LLaMA v2 70B params.json
# {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1
# {"dim": 8192, "multiple_of": 4096, "ffn_dim_multiplier": 1.3, "n_heads": 64, "n_kv_heads": 8, "n_layers": 80, "norm_eps": 1e-05, "vocab_size": -1}
@staticmethod
def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
config = json.load(open(config_path))

n_vocab = config["vocab_size"] if "vocab_size" in config else -1
n_embd = config["dim"]
n_layer = config["n_layers"]
n_mult = config["multiple_of"]
n_ff = -1
n_head = config["n_heads"]
n_head_kv = config["n_kv_heads"] if "n_kv_heads" in config else n_head
Expand All @@ -285,7 +270,6 @@ def loadOriginalParamsJson(model: LazyModel, config_path: Path) -> Params:
return Params(
n_vocab = n_vocab,
n_embd = n_embd,
n_mult = n_mult,
n_layer = n_layer,
n_ctx = n_ctx,
n_ff = n_ff,
Expand Down