3
3
#
4
4
# This source code is licensed under the BSD-style license found in the
5
5
# LICENSE file in the root directory of this source tree.
6
-
7
6
import unittest
8
7
9
- from dataclasses import dataclass
10
- from typing import List , Optional , Tuple
8
+ from typing import Any , Dict , List , Tuple
11
9
12
10
import numpy as np
13
11
import PIL
14
12
import torch
15
13
14
+ # Import these first. Otherwise, the custom ops are not registered.
16
15
from executorch .extension .pybindings import portable_lib # noqa # usort: skip
17
- from executorch .extension .llm .custom_ops import sdpa_with_kv_cache # noqa # usort: skip
18
- from executorch . examples . models . llama3_2_vision . preprocess . export_preprocess_lib import (
19
- export_preprocess ,
20
- get_example_inputs ,
21
- lower_to_executorch_preprocess ,
16
+ from executorch .extension .llm .custom_ops import op_tile_crop_aot # noqa # usort: skip
17
+
18
+ from executorch . examples . models . llama3_2_vision . preprocess . model import (
19
+ CLIPImageTransformModel ,
20
+ PreprocessConfig ,
22
21
)
22
+
23
+ from executorch .exir import EdgeCompileConfig , to_edge
24
+
23
25
from executorch .extension .pybindings .portable_lib import (
24
26
_load_for_executorch_from_buffer ,
25
27
)
26
28
27
- from parameterized import parameterized
28
29
from PIL import Image
29
30
30
- from torchtune .models .clip .inference ._transform import (
31
- _CLIPImageTransform ,
32
- CLIPImageTransform ,
33
- )
31
+ from torchtune .models .clip .inference ._transform import CLIPImageTransform
34
32
35
33
from torchtune .modules .transforms .vision_utils .get_canvas_best_fit import (
36
34
find_supported_resolutions ,
43
41
from torchvision .transforms .v2 import functional as F
44
42
45
43
46
- @dataclass
47
- class PreprocessConfig :
48
- image_mean : Optional [List [float ]] = None
49
- image_std : Optional [List [float ]] = None
50
- resize_to_max_canvas : bool = True
51
- resample : str = "bilinear"
52
- antialias : bool = False
53
- tile_size : int = 224
54
- max_num_tiles : int = 4
55
- possible_resolutions = None
56
-
57
-
58
44
class TestImageTransform (unittest .TestCase ):
59
45
"""
60
46
This unittest checks that the exported image transform model produces the
@@ -66,6 +52,58 @@ class TestImageTransform(unittest.TestCase):
66
52
https://github.com/pytorch/torchtune/blob/main/torchtune/models/clip/inference/_transforms.py#L26
67
53
"""
68
54
55
+ @staticmethod
56
+ def initialize_models (resize_to_max_canvas : bool ) -> Dict [str , Any ]:
57
+ config = PreprocessConfig (resize_to_max_canvas = resize_to_max_canvas )
58
+
59
+ reference_model = CLIPImageTransform (
60
+ image_mean = config .image_mean ,
61
+ image_std = config .image_std ,
62
+ resample = config .resample ,
63
+ antialias = config .antialias ,
64
+ tile_size = config .tile_size ,
65
+ max_num_tiles = config .max_num_tiles ,
66
+ resize_to_max_canvas = config .resize_to_max_canvas ,
67
+ possible_resolutions = None ,
68
+ )
69
+
70
+ model = CLIPImageTransformModel (config )
71
+
72
+ exported_model = torch .export .export (
73
+ model .get_eager_model (),
74
+ model .get_example_inputs (),
75
+ dynamic_shapes = model .get_dynamic_shapes (),
76
+ strict = False ,
77
+ )
78
+
79
+ # aoti_path = torch._inductor.aot_compile(
80
+ # exported_model.module(),
81
+ # model.get_example_inputs(),
82
+ # )
83
+
84
+ edge_program = to_edge (
85
+ exported_model , compile_config = EdgeCompileConfig (_check_ir_validity = False )
86
+ )
87
+ executorch_model = edge_program .to_executorch ()
88
+
89
+ return {
90
+ "config" : config ,
91
+ "reference_model" : reference_model ,
92
+ "model" : model ,
93
+ "exported_model" : exported_model ,
94
+ # "aoti_path": aoti_path,
95
+ "executorch_model" : executorch_model ,
96
+ }
97
+
98
+ @classmethod
99
+ def setUpClass (cls ):
100
+ cls .models_no_resize = TestImageTransform .initialize_models (
101
+ resize_to_max_canvas = False
102
+ )
103
+ cls .models_resize = TestImageTransform .initialize_models (
104
+ resize_to_max_canvas = True
105
+ )
106
+
69
107
def setUp (self ):
70
108
np .random .seed (0 )
71
109
@@ -121,51 +159,7 @@ def prepare_inputs(
121
159
122
160
return image_tensor , inscribed_size , best_resolution
123
161
124
- # This test setup mirrors the one in torchtune:
125
- # https://github.com/pytorch/torchtune/blob/main/tests/torchtune/models/clip/test_clip_image_transform.py
126
- # The values are slightly different, as torchtune uses antialias=True,
127
- # and this test uses antialias=False, which is exportable (has a portable kernel).
128
- @parameterized .expand (
129
- [
130
- (
131
- (100 , 400 , 3 ), # image_size
132
- torch .Size ([2 , 3 , 224 , 224 ]), # expected shape
133
- False , # resize_to_max_canvas
134
- [0.2230 , 0.1763 ], # expected_tile_means
135
- [1.0 , 1.0 ], # expected_tile_max
136
- [0.0 , 0.0 ], # expected_tile_min
137
- [1 , 2 ], # expected_aspect_ratio
138
- ),
139
- (
140
- (1000 , 300 , 3 ), # image_size
141
- torch .Size ([4 , 3 , 224 , 224 ]), # expected shape
142
- True , # resize_to_max_canvas
143
- [0.5005 , 0.4992 , 0.5004 , 0.1651 ], # expected_tile_means
144
- [0.9976 , 0.9940 , 0.9936 , 0.9906 ], # expected_tile_max
145
- [0.0037 , 0.0047 , 0.0039 , 0.0 ], # expected_tile_min
146
- [4 , 1 ], # expected_aspect_ratio
147
- ),
148
- (
149
- (200 , 200 , 3 ), # image_size
150
- torch .Size ([4 , 3 , 224 , 224 ]), # expected shape
151
- True , # resize_to_max_canvas
152
- [0.5012 , 0.5020 , 0.5010 , 0.4991 ], # expected_tile_means
153
- [0.9921 , 0.9925 , 0.9969 , 0.9908 ], # expected_tile_max
154
- [0.0056 , 0.0069 , 0.0059 , 0.0032 ], # expected_tile_min
155
- [2 , 2 ], # expected_aspect_ratio
156
- ),
157
- (
158
- (600 , 200 , 3 ), # image_size
159
- torch .Size ([3 , 3 , 224 , 224 ]), # expected shape
160
- False , # resize_to_max_canvas
161
- [0.4472 , 0.4468 , 0.3031 ], # expected_tile_means
162
- [1.0 , 1.0 , 1.0 ], # expected_tile_max
163
- [0.0 , 0.0 , 0.0 ], # expected_tile_min
164
- [3 , 1 ], # expected_aspect_ratio
165
- ),
166
- ]
167
- )
168
- def test_preprocess (
162
+ def run_preprocess (
169
163
self ,
170
164
image_size : Tuple [int ],
171
165
expected_shape : torch .Size ,
@@ -175,45 +169,7 @@ def test_preprocess(
175
169
expected_tile_min : List [float ],
176
170
expected_ar : List [int ],
177
171
) -> None :
178
- config = PreprocessConfig (resize_to_max_canvas = resize_to_max_canvas )
179
-
180
- reference_model = CLIPImageTransform (
181
- image_mean = config .image_mean ,
182
- image_std = config .image_std ,
183
- resize_to_max_canvas = config .resize_to_max_canvas ,
184
- resample = config .resample ,
185
- antialias = config .antialias ,
186
- tile_size = config .tile_size ,
187
- max_num_tiles = config .max_num_tiles ,
188
- possible_resolutions = None ,
189
- )
190
-
191
- eager_model = _CLIPImageTransform (
192
- image_mean = config .image_mean ,
193
- image_std = config .image_std ,
194
- resample = config .resample ,
195
- antialias = config .antialias ,
196
- tile_size = config .tile_size ,
197
- max_num_tiles = config .max_num_tiles ,
198
- )
199
-
200
- exported_model = export_preprocess (
201
- image_mean = config .image_mean ,
202
- image_std = config .image_std ,
203
- resample = config .resample ,
204
- antialias = config .antialias ,
205
- tile_size = config .tile_size ,
206
- max_num_tiles = config .max_num_tiles ,
207
- )
208
-
209
- executorch_model = lower_to_executorch_preprocess (exported_model )
210
- executorch_module = _load_for_executorch_from_buffer (executorch_model .buffer )
211
-
212
- aoti_path = torch ._inductor .aot_compile (
213
- exported_model .module (),
214
- get_example_inputs (),
215
- )
216
-
172
+ models = self .models_resize if resize_to_max_canvas else self .models_no_resize
217
173
# Prepare image input.
218
174
image = (
219
175
np .random .randint (0 , 256 , np .prod (image_size ))
@@ -223,6 +179,7 @@ def test_preprocess(
223
179
image = PIL .Image .fromarray (image )
224
180
225
181
# Run reference model.
182
+ reference_model = models ["reference_model" ]
226
183
reference_output = reference_model (image = image )
227
184
reference_image = reference_output ["image" ]
228
185
reference_ar = reference_output ["aspect_ratio" ].tolist ()
@@ -249,10 +206,11 @@ def test_preprocess(
249
206
# Pre-work for eager and exported models. The reference model performs these
250
207
# calculations and passes the result to _CLIPImageTransform, the exportable model.
251
208
image_tensor , inscribed_size , best_resolution = self .prepare_inputs (
252
- image = image , config = config
209
+ image = image , config = models [ " config" ]
253
210
)
254
211
255
212
# Run eager model and check it matches reference model.
213
+ eager_model = models ["model" ].get_eager_model ()
256
214
eager_image , eager_ar = eager_model (
257
215
image_tensor , inscribed_size , best_resolution
258
216
)
@@ -261,6 +219,7 @@ def test_preprocess(
261
219
self .assertEqual (reference_ar , eager_ar )
262
220
263
221
# Run exported model and check it matches reference model.
222
+ exported_model = models ["exported_model" ]
264
223
exported_image , exported_ar = exported_model .module ()(
265
224
image_tensor , inscribed_size , best_resolution
266
225
)
@@ -269,14 +228,65 @@ def test_preprocess(
269
228
self .assertEqual (reference_ar , exported_ar )
270
229
271
230
# Run executorch model and check it matches reference model.
231
+ executorch_model = models ["executorch_model" ]
232
+ executorch_module = _load_for_executorch_from_buffer (executorch_model .buffer )
272
233
et_image , et_ar = executorch_module .forward (
273
234
(image_tensor , inscribed_size , best_resolution )
274
235
)
275
236
self .assertTrue (torch .allclose (reference_image , et_image ))
276
237
self .assertEqual (reference_ar , et_ar .tolist ())
277
238
278
239
# Run aoti model and check it matches reference model.
279
- aoti_model = torch ._export .aot_load (aoti_path , "cpu" )
280
- aoti_image , aoti_ar = aoti_model (image_tensor , inscribed_size , best_resolution )
281
- self .assertTrue (torch .allclose (reference_image , aoti_image ))
282
- self .assertEqual (reference_ar , aoti_ar .tolist ())
240
+ # aoti_path = models["aoti_path"]
241
+ # aoti_model = torch._export.aot_load(aoti_path, "cpu")
242
+ # aoti_image, aoti_ar = aoti_model(image_tensor, inscribed_size, best_resolution)
243
+ # self.assertTrue(torch.allclose(reference_image, aoti_image))
244
+ # self.assertEqual(reference_ar, aoti_ar.tolist())
245
+
246
+ # This test setup mirrors the one in torchtune:
247
+ # https://github.com/pytorch/torchtune/blob/main/tests/torchtune/models/clip/test_clip_image_transform.py
248
+ # The values are slightly different, as torchtune uses antialias=True,
249
+ # and this test uses antialias=False, which is exportable (has a portable kernel).
250
+ def test_preprocess1 (self ):
251
+ self .run_preprocess (
252
+ (100 , 400 , 3 ), # image_size
253
+ torch .Size ([2 , 3 , 224 , 224 ]), # expected shape
254
+ False , # resize_to_max_canvas
255
+ [0.2230 , 0.1763 ], # expected_tile_means
256
+ [1.0 , 1.0 ], # expected_tile_max
257
+ [0.0 , 0.0 ], # expected_tile_min
258
+ [1 , 2 ], # expected_aspect_ratio
259
+ )
260
+
261
+ def test_preprocess2 (self ):
262
+ self .run_preprocess (
263
+ (1000 , 300 , 3 ), # image_size
264
+ torch .Size ([4 , 3 , 224 , 224 ]), # expected shape
265
+ True , # resize_to_max_canvas
266
+ [0.5005 , 0.4992 , 0.5004 , 0.1651 ], # expected_tile_means
267
+ [0.9976 , 0.9940 , 0.9936 , 0.9906 ], # expected_tile_max
268
+ [0.0037 , 0.0047 , 0.0039 , 0.0 ], # expected_tile_min
269
+ [4 , 1 ], # expected_aspect_ratio
270
+ )
271
+
272
+ def test_preprocess3 (self ):
273
+ self .run_preprocess (
274
+ (200 , 200 , 3 ), # image_size
275
+ torch .Size ([4 , 3 , 224 , 224 ]), # expected shape
276
+ True , # resize_to_max_canvas
277
+ [0.5012 , 0.5020 , 0.5010 , 0.4991 ], # expected_tile_means
278
+ [0.9921 , 0.9925 , 0.9969 , 0.9908 ], # expected_tile_max
279
+ [0.0056 , 0.0069 , 0.0059 , 0.0032 ], # expected_tile_min
280
+ [2 , 2 ], # expected_aspect_ratio
281
+ )
282
+
283
+ def test_preprocess4 (self ):
284
+ self .run_preprocess (
285
+ (600 , 200 , 3 ), # image_size
286
+ torch .Size ([3 , 3 , 224 , 224 ]), # expected shape
287
+ False , # resize_to_max_canvas
288
+ [0.4472 , 0.4468 , 0.3031 ], # expected_tile_means
289
+ [1.0 , 1.0 , 1.0 ], # expected_tile_max
290
+ [0.0 , 0.0 , 0.0 ], # expected_tile_min
291
+ [3 , 1 ], # expected_aspect_ratio
292
+ )
0 commit comments