15
15
16
16
from executorch .extension .pybindings import portable_lib # noqa # usort: skip
17
17
from executorch .extension .llm .custom_ops import sdpa_with_kv_cache # noqa # usort: skip
18
+ from executorch .examples .models .llama3_2_vision .preprocess .export_preprocess_lib import (
19
+ export_preprocess ,
20
+ get_example_inputs ,
21
+ lower_to_executorch_preprocess ,
22
+ )
18
23
from executorch .extension .pybindings .portable_lib import (
19
24
_load_for_executorch_from_buffer ,
20
25
)
37
42
)
38
43
from torchvision .transforms .v2 import functional as F
39
44
40
- from .export_preprocess_lib import (
41
- export_preprocess ,
42
- get_example_inputs ,
43
- lower_to_executorch_preprocess ,
44
- )
45
-
46
45
47
46
@dataclass
48
47
class PreprocessConfig :
@@ -54,7 +53,6 @@ class PreprocessConfig:
54
53
tile_size : int = 224
55
54
max_num_tiles : int = 4
56
55
possible_resolutions = None
57
- pad_max_tiles : bool = True
58
56
59
57
60
58
class TestImageTransform (unittest .TestCase ):
@@ -137,17 +135,6 @@ def prepare_inputs(
137
135
[1.0 , 1.0 ], # expected_tile_max
138
136
[0.0 , 0.0 ], # expected_tile_min
139
137
[1 , 2 ], # expected_aspect_ratio
140
- False , # pad_max_tiles
141
- ),
142
- (
143
- (100 , 400 , 3 ), # image_size
144
- torch .Size ([4 , 3 , 224 , 224 ]), # expected shape
145
- False , # resize_to_max_canvas
146
- [0.2230 , 0.1763 , 0.0 , 0.0 ], # expected_tile_means
147
- [1.0 , 1.0 , 0.0 , 0.0 ], # expected_tile_max
148
- [0.0 , 0.0 , 0.0 , 0.0 ], # expected_tile_min
149
- [1 , 2 ], # expected_aspect_ratio
150
- True , # pad_max_tiles
151
138
),
152
139
(
153
140
(1000 , 300 , 3 ), # image_size
@@ -157,7 +144,6 @@ def prepare_inputs(
157
144
[0.9976 , 0.9940 , 0.9936 , 0.9906 ], # expected_tile_max
158
145
[0.0037 , 0.0047 , 0.0039 , 0.0 ], # expected_tile_min
159
146
[4 , 1 ], # expected_aspect_ratio
160
- False , # pad_max_tiles
161
147
),
162
148
(
163
149
(200 , 200 , 3 ), # image_size
@@ -167,7 +153,6 @@ def prepare_inputs(
167
153
[0.9921 , 0.9925 , 0.9969 , 0.9908 ], # expected_tile_max
168
154
[0.0056 , 0.0069 , 0.0059 , 0.0032 ], # expected_tile_min
169
155
[2 , 2 ], # expected_aspect_ratio
170
- False , # pad_max_tiles
171
156
),
172
157
(
173
158
(600 , 200 , 3 ), # image_size
@@ -177,17 +162,6 @@ def prepare_inputs(
177
162
[1.0 , 1.0 , 1.0 ], # expected_tile_max
178
163
[0.0 , 0.0 , 0.0 ], # expected_tile_min
179
164
[3 , 1 ], # expected_aspect_ratio
180
- False , # pad_max_tiles
181
- ),
182
- (
183
- (600 , 200 , 3 ), # image_size
184
- torch .Size ([4 , 3 , 224 , 224 ]), # expected shape
185
- False , # resize_to_max_canvas
186
- [0.4472 , 0.4468 , 0.3031 , 0.0 ], # expected_tile_means
187
- [1.0 , 1.0 , 1.0 , 0.0 ], # expected_tile_max
188
- [0.0 , 0.0 , 0.0 , 0.0 ], # expected_tile_min
189
- [3 , 1 ], # expected_aspect_ratio
190
- True , # pad_max_tiles
191
165
),
192
166
]
193
167
)
@@ -200,11 +174,8 @@ def test_preprocess(
200
174
expected_tile_max : List [float ],
201
175
expected_tile_min : List [float ],
202
176
expected_ar : List [int ],
203
- pad_max_tiles : bool ,
204
177
) -> None :
205
- config = PreprocessConfig (
206
- resize_to_max_canvas = resize_to_max_canvas , pad_max_tiles = pad_max_tiles
207
- )
178
+ config = PreprocessConfig (resize_to_max_canvas = resize_to_max_canvas )
208
179
209
180
reference_model = CLIPImageTransform (
210
181
image_mean = config .image_mean ,
@@ -215,7 +186,6 @@ def test_preprocess(
215
186
tile_size = config .tile_size ,
216
187
max_num_tiles = config .max_num_tiles ,
217
188
possible_resolutions = None ,
218
- pad_max_tiles = config .pad_max_tiles ,
219
189
)
220
190
221
191
eager_model = _CLIPImageTransform (
@@ -225,7 +195,6 @@ def test_preprocess(
225
195
antialias = config .antialias ,
226
196
tile_size = config .tile_size ,
227
197
max_num_tiles = config .max_num_tiles ,
228
- pad_max_tiles = config .pad_max_tiles ,
229
198
)
230
199
231
200
exported_model = export_preprocess (
@@ -235,7 +204,6 @@ def test_preprocess(
235
204
antialias = config .antialias ,
236
205
tile_size = config .tile_size ,
237
206
max_num_tiles = config .max_num_tiles ,
238
- pad_max_tiles = config .pad_max_tiles ,
239
207
)
240
208
241
209
executorch_model = lower_to_executorch_preprocess (exported_model )
@@ -275,11 +243,8 @@ def test_preprocess(
275
243
self .assertAlmostEqual (tile .min ().item (), expected_tile_min [i ], delta = 1e-4 )
276
244
277
245
# Check num tiles matches the product of the aspect ratio.
278
- if pad_max_tiles :
279
- self .assertEqual (config .max_num_tiles , reference_image .shape [0 ])
280
- else :
281
- expected_num_tiles = reference_ar [0 ] * reference_ar [1 ]
282
- self .assertEqual (expected_num_tiles , reference_image .shape [0 ])
246
+ expected_num_tiles = reference_ar [0 ] * reference_ar [1 ]
247
+ self .assertEqual (expected_num_tiles , reference_image .shape [0 ])
283
248
284
249
# Pre-work for eager and exported models. The reference model performs these
285
250
# calculations and passes the result to _CLIPImageTransform, the exportable model.
0 commit comments