Skip to content

Commit 2cca684

Browse files
authored
Add BUCK files for llava python and C++ libs
Differential Revision: D69278781 Pull Request resolved: #8297
1 parent eadd027 commit 2cca684

File tree

4 files changed

+48
-2
lines changed

4 files changed

+48
-2
lines changed

examples/models/llava/export_llava.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def export(self) -> "LlavaEdgeManager":
6767
dynamic_shapes=dynamic_shape,
6868
strict=False,
6969
)
70+
# pyre-ignore: Incompatible attribute type [8]: Attribute `pre_autograd_graph_module` declared in class `LLMEdgeManager` has type `Optional[GraphModule]` but is used as type `Module`.
7071
self.pre_autograd_graph_module = self.export_program.module()
7172
return self
7273

examples/models/llava/image_util.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
logging.basicConfig(level=logging.INFO, format=FORMAT)
2222

2323

24+
# pyre-ignore: Undefined or invalid type [11]: Annotation `Image` is not defined as a type.
2425
def prepare_image(image: Image, target_h: int, target_w: int) -> torch.Tensor:
2526
"""Read image into a tensor and resize the image so that it fits in
2627
a target_h x target_w canvas.

examples/models/llava/model.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from executorch.examples.models.llama.source_transformation.sdpa import (
2222
replace_sdpa_with_custom_op,
2323
)
24+
2425
from executorch.examples.models.llava.image_util import prepare_image
2526
from executorch.examples.models.model_base import EagerModelBase
2627
from PIL import Image
@@ -48,6 +49,7 @@ def __init__(
4849
self.use_sdpa_with_kv_cache_op = use_sdpa_with_kv_cache_op
4950
self.model_ = llava_model
5051
self.image_processor = image_processor
52+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `config`.
5153
self.vision_feature_layer = self.model_.config.vision_feature_layer
5254
self.vision_feature_select_strategy = (
5355
self.model_.config.vision_feature_select_strategy
@@ -76,6 +78,7 @@ def __init__(
7678
)
7779

7880
def _translate_state_dict_for_text_model(self) -> Dict[str, Any]:
81+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `language_model`.
7982
state_dict = self.model_.language_model.state_dict()
8083
key_map = {
8184
# fmt: off
@@ -128,9 +131,11 @@ def get_model(self):
128131
return self.model_.get_model()
129132

130133
def embed_tokens(self, tokens: torch.Tensor) -> torch.Tensor:
134+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `language_model`.
131135
return self.model_.language_model.model.embed_tokens(tokens)
132136

133137
def encode_images(self, images: torch.Tensor) -> torch.Tensor:
138+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `dtype`.
134139
images = images.to(dtype=self.model_.dtype)
135140
if type(images) is list:
136141
image_features = []
@@ -144,15 +149,19 @@ def encode_images(self, images: torch.Tensor) -> torch.Tensor:
144149
image_feature = self._feature_select(image_forward_out).to(image.dtype)
145150
image_features.append(image_feature)
146151
else:
152+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `vision_tower`.
147153
image_forward_outs = self.model_.vision_tower(
154+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `device`.
148155
images.to(device=self.model_.device, dtype=self.model_.dtype),
149156
output_hidden_states=True,
150157
)
151158
image_features = self._feature_select(image_forward_outs).to(images.dtype)
159+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `multi_modal_projector`.
152160
image_features = self.model_.multi_modal_projector(image_features)
153161
return image_features
154162

155163
def image_preprocess(self, img: torch.Tensor) -> torch.Tensor:
164+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_vision_objects.CLIPImageProcessor` has no attribute `crop_size`.
156165
target_h = self.image_processor.crop_size["height"]
157166
target_w = self.image_processor.crop_size["width"]
158167
# pad the image with median rgb value, to make a square
@@ -195,10 +204,15 @@ def image_preprocess(self, img: torch.Tensor) -> torch.Tensor:
195204
# print(resized.shape)
196205
# cropped = F.center_crop(img, output_size=[w, w])
197206
# print(cropped.shape)
207+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_vision_objects.CLIPImageProcessor` has no attribute `rescale_factor`.
198208
scaled = resized * self.image_processor.rescale_factor
199209
# print(scaled)
200210
normed = F.normalize(
201-
scaled, self.image_processor.image_mean, self.image_processor.image_std
211+
scaled,
212+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_vision_objects.CLIPImageProcessor` has no attribute `image_mean`.
213+
self.image_processor.image_mean,
214+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_vision_objects.CLIPImageProcessor` has no attribute `image_std`.
215+
self.image_processor.image_std,
202216
)
203217
# print(normed)
204218
return normed.unsqueeze(0)
@@ -249,7 +263,9 @@ def prefill_ref(
249263
) -> torch.Tensor:
250264
"""Avoiding the torch.where() call to find <image> placeholder and insert image embedding. Taking 3 inputs instead."""
251265
embeds = self.prefill_embedding(prompt_before_image, images, prompt_after_image)
266+
# pyre-ignore: Undefined attribute [16]: Module `transformers` has no attribute `LlamaForCausalLM`.
252267
return LlamaForCausalLM.forward(
268+
# pyre-ignore: Undefined attribute [16]: `transformers.utils.dummy_pt_objects.LlavaForConditionalGeneration` has no attribute `language_model`.
253269
self.model_.language_model,
254270
inputs_embeds=embeds,
255271
return_dict=False,
@@ -268,12 +284,16 @@ class LlavaModel(EagerModelBase):
268284
def __init__(self, use_sdpa_with_kv_cache_op=True, max_seq_len=768):
269285
self.use_sdpa_with_kv_cache_op = use_sdpa_with_kv_cache_op
270286
self.max_seq_len = max_seq_len
271-
self.processor = AutoProcessor.from_pretrained("llava-hf/llava-1.5-7b-hf")
287+
self.processor = AutoProcessor.from_pretrained(
288+
"llava-hf/llava-1.5-7b-hf",
289+
revision="a272c74b2481d8aff3aa6fc2c4bf891fe57334fb", # Need this for transformers >= 4.44.2
290+
)
272291
self.tokenizer = self.processor.tokenizer
273292
self.image_processor = self.processor.image_processor
274293
self.model = LlavaForConditionalGeneration.from_pretrained(
275294
"llava-hf/llava-1.5-7b-hf",
276295
device_map="cpu",
296+
revision="a272c74b2481d8aff3aa6fc2c4bf891fe57334fb", # Need this for transformers >= 4.44.2
277297
)
278298
self.image = Image.open(
279299
requests.get(

examples/models/llava/targets.bzl

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_oss_build_kwargs", "runtime")
2+
3+
def define_common_targets():
4+
runtime.cxx_binary(
5+
name = "main",
6+
srcs = [
7+
"main.cpp",
8+
],
9+
compiler_flags = ["-Wno-global-constructors"],
10+
preprocessor_flags = [
11+
"-DET_USE_THREADPOOL",
12+
],
13+
deps = [
14+
"//executorch/examples/models/llava/runner:runner",
15+
"//executorch/extension/evalue_util:print_evalue",
16+
"//executorch/extension/threadpool:cpuinfo_utils",
17+
"//executorch/extension/threadpool:threadpool",
18+
],
19+
external_deps = [
20+
"gflags",
21+
"torch-core-cpp",
22+
],
23+
**get_oss_build_kwargs()
24+
)

0 commit comments

Comments
 (0)