Skip to content

Commit 3fb93fe

Browse files
committed
Update on "Add new export LLM config"
Differential Revision: [D75263991](https://our.internmc.facebook.com/intern/diff/D75263991) [ghstack-poisoned]
2 parents a02693f + 33b3830 commit 3fb93fe

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

examples/models/llama/config/llm_config.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,9 @@ def __post_init__(self):
331331
self._validate_qmode()
332332

333333
def _validate_qmode(self) -> None:
334+
if not self.qmode:
335+
return
336+
334337
if self.qmode in self.QMODE_OPTIONS:
335338
return
336339

@@ -466,13 +469,20 @@ class LlmConfig:
466469
backend: BackendConfig = field(default_factory=BackendConfig)
467470

468471
def __post_init__(self):
469-
# If we are using Ao's low bit quantization kernels for ARM,
470-
# we do not want to also be delegating to a CPU backend (XNNPack).
472+
self._validate_low_bit_no_xnnpack()
473+
474+
def _validate_low_bit(self):
475+
if not self.quantization.qmode:
476+
return
477+
471478
using_lowbit_ops = False
472479
for pattern in self.quantization.AO_QUANT_PATTERNS:
473480
matches = re.findall(pattern, self.quantization.qmode)
474481
if len(matches) == 1:
475482
using_lowbit_ops = True
483+
484+
# If we are using Ao's low bit quantization kernels for ARM,
485+
# we do not want to also be delegating to a CPU backend (XNNPack).
476486
if using_lowbit_ops and self.backend.xnnpack.enabled:
477487
raise ValueError(
478488
"Cannot use low-bit Ao ops (from qmode=torchao:...) while also delegating to XNNPack."

0 commit comments

Comments
 (0)