Skip to content

Commit f938acb

Browse files
committed
Patch
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags:
1 parent f7c1459 commit f938acb

File tree

2 files changed

+2
-10
lines changed

2 files changed

+2
-10
lines changed

examples/models/llama2/export_llama_lib.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
Transformer,
3030
)
3131
from executorch.exir.backend.backend_details import CompileSpec
32-
32+
from executorch.exir.passes import *
3333
from executorch.sdk.etrecord import generate_etrecord
3434
from executorch.util.activation_memory_profiler import generate_memory_trace
3535
from sentencepiece import SentencePieceProcessor
@@ -539,7 +539,7 @@ def _prepare_for_llama_export(modelname: str, args) -> LlamaEdgeManager:
539539
bitwidth = int(bitwidth)
540540
transforms.append(
541541
lambda model: EmbeddingQuantHandler(
542-
model, bitwidth=bitwidth, group_size=group_size
542+
model, bitwidth=bitwidth, group_size=group_size, packed=(bitwidth==4),
543543
).quantized_model()
544544
)
545545

examples/models/llama2/quantize.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -436,18 +436,10 @@ def __init__(
436436
@torch.no_grad()
437437
def forward(self, indices: torch.Tensor) -> torch.Tensor:
438438
if not self.packed: # 8bit
439-
<<<<<<< HEAD
440439
return torch.ops.quantized_decomposed.embedding_byte.dtype(
441440
self.weight, self.scales, None, 0, 0, indices, dtype=self.dtype
442441
)
443442
else: # 4bit packed
444443
return torch.ops.quantized_decomposed.embedding_4bit.dtype(
445-
=======
446-
return torch.ops.llama_quantized.DEPRECATED_DO_NOT_USE_embedding_byte.dtype(
447-
self.weight, self.scales, None, 0, 0, indices, dtype=self.dtype
448-
)
449-
else: # 4bit packed
450-
return torch.ops.llama_quantized.embedding_4bit.dtype(
451-
>>>>>>> 6b3b7228c (4b embedding quantizer (#3081))
452444
self.weight, self.scales, None, 0, 0, indices, dtype=self.dtype
453445
)

0 commit comments

Comments
 (0)