Skip to content

refactor: Use LlmConfig for model parameters instead of kwargs #11168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions examples/models/llama/export_llama_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1229,15 +1229,7 @@ def _load_llama_model(
checkpoint=checkpoint,
checkpoint_dir=checkpoint_dir,
params=params_path,
use_kv_cache=use_kv_cache,
use_sdpa_with_kv_cache=use_sdpa_with_kv_cache,
generate_full_logits=generate_full_logits,
fairseq2=weight_type == WeightType.FAIRSEQ2,
max_seq_len=max_seq_len,
max_context_len=max_context_len,
enable_dynamic_shape=enable_dynamic_shape,
input_prune_map_path=input_prune_map_path,
output_prune_map_path=output_prune_map_path,
dtype=torch_dtype,
llm_config=llm_config,
)
Expand Down
35 changes: 25 additions & 10 deletions examples/models/llama/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,30 @@ def __init__(self, **kwargs):
# Params file.
params_path = kwargs.get("params", None)

self.use_kv_cache = kwargs.get("use_kv_cache", False)
self.use_sdpa_with_kv_cache_op = kwargs.get("use_sdpa_with_kv_cache", False)
self.generate_full_logits = kwargs.get("generate_full_logits", False)
self.enable_dynamic_shape = kwargs.get("enable_dynamic_shape", False)
self.input_prune_map_path = kwargs.get("input_prune_map_path", None)
self.output_prune_map_path = kwargs.get("output_prune_map_path", None)
self.max_seq_len = kwargs.get("max_seq_len", 128)
self.max_context_len = kwargs.get("max_context_len", 128)
self.llm_config = kwargs.get("llm_config", None)

# Set all parameters from llm_config if available, otherwise use kwargs as fallback
if self.llm_config:
self.use_kv_cache = self.llm_config.model.use_kv_cache
self.use_sdpa_with_kv_cache_op = self.llm_config.model.use_sdpa_with_kv_cache
self.generate_full_logits = self.llm_config.debug.generate_full_logits
self.enable_dynamic_shape = self.llm_config.model.enable_dynamic_shape
self.input_prune_map_path = self.llm_config.model.input_prune_map
self.output_prune_map_path = self.llm_config.model.output_prune_map
self.max_seq_len = self.llm_config.export.max_seq_length
self.max_context_len = self.llm_config.export.max_context_length
self.verbose = self.llm_config.debug.verbose
else:
# Fallback to kwargs for backward compatibility
self.use_kv_cache = kwargs.get("use_kv_cache", False)
self.use_sdpa_with_kv_cache_op = kwargs.get("use_sdpa_with_kv_cache", False)
self.generate_full_logits = kwargs.get("generate_full_logits", False)
self.enable_dynamic_shape = kwargs.get("enable_dynamic_shape", False)
self.input_prune_map_path = kwargs.get("input_prune_map_path", None)
self.output_prune_map_path = kwargs.get("output_prune_map_path", None)
self.max_seq_len = kwargs.get("max_seq_len", 128)
self.max_context_len = kwargs.get("max_context_len", 128)
self.verbose = kwargs.get("verbose", False)

assert (
self.max_context_len >= self.max_seq_len
Expand Down Expand Up @@ -165,7 +180,7 @@ def __init__(self, **kwargs):
if model_name not in ["llama3", "llama3_1"]:
model_args.rope_scale_factor = 32

if kwargs.get("verbose", False):
if self.verbose:
print("============= weights ================")
print("{key} : {weights.numel()} : {weights.size()}")
for key, weights in checkpoint.items():
Expand Down Expand Up @@ -280,7 +295,7 @@ def __init__(self, **kwargs):
f"The provided checkpoint is missing the following weights that are expected by the model: {missing_weights}. Please fix the fqn's in your checkpoint to match."
)
if unexpected:
if kwargs.get("verbose", False):
if self.verbose:
print(f"Unexpected keys: {unexpected}")

# Prune the input layer if input_prune_map is provided
Expand Down
Loading