Skip to content

Commit 1263964

Browse files
authored
Preprocess C++
Differential Revision: D61833480 Pull Request resolved: #4987
1 parent 12039af commit 1263964

File tree

4 files changed

+292
-0
lines changed

4 files changed

+292
-0
lines changed
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include "executorch/examples/models/flamingo/preprocess/preprocess.h"
10+
11+
#include <algorithm>
12+
#include <cassert>
13+
14+
std::vector<int> _get_factors(int n) {
15+
std::vector<int> factors;
16+
for (int i = 1; i <= n; i++) {
17+
if (n % i == 0) {
18+
factors.push_back(i);
19+
}
20+
}
21+
return factors;
22+
}
23+
24+
std::vector<std::vector<int>> find_supported_resolutions(
25+
int max_num_tiles,
26+
int tile_size) {
27+
std::vector<std::vector<int>> supported_resolutions;
28+
for (int _tile_size = max_num_tiles; _tile_size > 0; _tile_size--) {
29+
auto factors = _get_factors(_tile_size);
30+
for (int i = 0; i < factors.size(); i++) {
31+
int height = factors[i];
32+
int width = _tile_size / factors[i];
33+
supported_resolutions.push_back({height * tile_size, width * tile_size});
34+
}
35+
}
36+
return supported_resolutions;
37+
}
38+
39+
std::vector<int> get_canvas_best_fit(
40+
std::vector<int> image_size,
41+
std::vector<std::vector<int>> possible_resolutions,
42+
bool resize_to_max_canvas) {
43+
assert(image_size.size() == 2);
44+
int image_h = image_size[0];
45+
int image_w = image_size[1];
46+
47+
float best_scale = -0.1;
48+
std::vector<int> best_resolution;
49+
int best_area = 0;
50+
51+
for (int i = 0; i < possible_resolutions.size(); i++) {
52+
assert(possible_resolutions[i].size() == 2);
53+
float scale_h = possible_resolutions[i][0] / (float)image_h;
54+
float scale_w = possible_resolutions[i][1] / (float)image_w;
55+
56+
// Get limiting side scaling -> no distortion
57+
float scale = scale_h < scale_w ? scale_h : scale_w;
58+
59+
bool is_candidate = false;
60+
61+
if (scale >= 1.0) {
62+
// Upscaling options.
63+
if (resize_to_max_canvas) {
64+
is_candidate = scale >= best_scale;
65+
} else {
66+
is_candidate = ((scale <= best_scale) || (best_resolution.size() == 0));
67+
}
68+
} else {
69+
// If no upscaling options, find the minimum downscaling (max scale for
70+
// scales < 1)
71+
is_candidate = ((scale >= best_scale) || (best_resolution.size() == 0));
72+
}
73+
74+
// Select the best resolution.
75+
if (is_candidate) {
76+
// @lint-ignore CLANGTIDY facebook-hte-ParameterUncheckedArrayBounds
77+
int area = possible_resolutions[i][0] * possible_resolutions[i][1];
78+
if (scale == best_scale) {
79+
// If there are multiple resolutions, get the one with minimum area to
80+
// reduce padding.
81+
if (scale >= 1.0 && area < best_area) {
82+
best_resolution = possible_resolutions[i];
83+
best_area = area;
84+
}
85+
} else {
86+
best_resolution = possible_resolutions[i];
87+
best_scale = scale;
88+
best_area = area;
89+
}
90+
}
91+
}
92+
return best_resolution;
93+
}
94+
95+
std::vector<int> get_inscribed_size(
96+
std::vector<int> image_size,
97+
std::vector<int> target_size,
98+
int max_size) {
99+
assert(image_size.size() == 2);
100+
assert(target_size.size() == 2);
101+
102+
int target_height = target_size[0];
103+
int target_width = target_size[1];
104+
105+
if (max_size > 0) {
106+
target_height = std::min(std::max(image_size[0], max_size), target_size[0]);
107+
target_width = std::min(std::max(image_size[1], max_size), target_size[1]);
108+
}
109+
110+
int resize_height = std::min(
111+
(int)(image_size[0] * (target_width / (float)image_size[1])),
112+
target_height);
113+
int resize_width = std::min(
114+
(int)(image_size[1] * (target_height / (float)image_size[0])),
115+
target_width);
116+
117+
return {resize_height, resize_width};
118+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#pragma once
10+
11+
#include <vector>
12+
13+
// C++ implementation of the python functions in torchtune:
14+
// https://github.com/pytorch/torchtune/tree/main/torchtune/modules/transforms/vision_utils
15+
16+
// Calculate all factors of a given number.
17+
std::vector<int> _get_factors(int n);
18+
19+
// Computes all combinations of resolutions, multiple of tile_size,
20+
// that contain up to max_num_tiles. Useful for when dividing an image into
21+
// tiles. For example, if we want at most 2 tiles per image, then we can support
22+
// the following resolutions: (1x1, 1x2, 2x1) * tile_size Returns a vector of
23+
// tuples of (height, width).
24+
std::vector<std::vector<int>> find_supported_resolutions(
25+
int max_num_tiles,
26+
int tile_size);
27+
28+
// Determines the best canvas possible from a list of possible resolutions to
29+
// resize an image to, without distortion.
30+
std::vector<int> get_canvas_best_fit(
31+
std::vector<int> image_size,
32+
std::vector<std::vector<int>> possible_resolutions,
33+
bool resize_to_max_canvas);
34+
35+
// Calculates the size of an image, if it was resized to be inscribed within the
36+
// target_size. It is upscaled or downscaled such that one size is equal to the
37+
// target_size, and the second size is less than or equal to the target_size.
38+
std::vector<int> get_inscribed_size(
39+
std::vector<int> image_size,
40+
std::vector<int> canvas_size,
41+
int max_size);
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <executorch/examples/models/flamingo/preprocess/preprocess.h>
10+
#include <gtest/gtest.h>
11+
12+
using namespace ::testing;
13+
14+
// Mirror the torchtune python testing:
15+
// https://github.com/pytorch/torchtune/tree/main/tests/torchtune/modules/transforms
16+
17+
void test_find_supported_resolutions(
18+
int max_num_tiles,
19+
int tile_size,
20+
std::vector<std::vector<int>> expected_resolutions) {
21+
std::vector<std::vector<int>> resolutions =
22+
find_supported_resolutions(max_num_tiles, tile_size);
23+
24+
EXPECT_EQ(resolutions.size(), expected_resolutions.size());
25+
26+
for (int i = 0; i < resolutions.size(); i++) {
27+
EXPECT_EQ(resolutions[i].size(), expected_resolutions[i].size());
28+
EXPECT_EQ(resolutions[i][0], expected_resolutions[i][0]); // height
29+
EXPECT_EQ(resolutions[i][1], expected_resolutions[i][1]); // width
30+
}
31+
}
32+
33+
TEST(PreprocessTest, TestFindSupportedResolution) {
34+
test_find_supported_resolutions(1, 224, {{224, 224}});
35+
test_find_supported_resolutions(2, 100, {{100, 200}, {200, 100}, {100, 100}});
36+
test_find_supported_resolutions(
37+
3, 50, {{50, 150}, {150, 50}, {50, 100}, {100, 50}, {50, 50}});
38+
test_find_supported_resolutions(
39+
4,
40+
300,
41+
{
42+
{300, 1200},
43+
{600, 600},
44+
{1200, 300},
45+
{300, 900},
46+
{900, 300},
47+
{300, 600},
48+
{600, 300},
49+
{300, 300},
50+
});
51+
}
52+
53+
void test_get_canvas_best_fit(
54+
std::vector<int> image_size,
55+
std::vector<std::vector<int>> possible_resolutions,
56+
bool resize_to_max_canvas,
57+
std::vector<int> expected_best_resolution) {
58+
std::vector<int> best_resolution = get_canvas_best_fit(
59+
image_size, possible_resolutions, resize_to_max_canvas);
60+
EXPECT_EQ(best_resolution[0], expected_best_resolution[0]); // height
61+
EXPECT_EQ(best_resolution[1], expected_best_resolution[1]); // width
62+
}
63+
64+
TEST(PreprocessTest, TestGetCanvasBestFit_200x300_F) {
65+
std::vector<std::vector<int>> possible_resolutions = {
66+
{224, 896},
67+
{448, 448},
68+
{224, 224},
69+
{896, 224},
70+
{224, 672},
71+
{672, 224},
72+
{224, 448},
73+
{448, 224},
74+
};
75+
test_get_canvas_best_fit(
76+
{200, 300},
77+
possible_resolutions,
78+
false, // resize_to_max_canvas
79+
{224, 448});
80+
81+
test_get_canvas_best_fit(
82+
{200, 500},
83+
possible_resolutions,
84+
true, // resize_to_max_canvas
85+
{224, 672});
86+
test_get_canvas_best_fit(
87+
{200, 200},
88+
possible_resolutions,
89+
false, // resize_to_max_canvas
90+
{224, 224});
91+
test_get_canvas_best_fit(
92+
{200, 100},
93+
possible_resolutions,
94+
true, // resize_to_max_canvas
95+
{448, 224});
96+
}
97+
98+
void test_get_inscribed_size(
99+
std::vector<int> image_size,
100+
std::vector<int> target_size,
101+
int max_size,
102+
std::vector<int> expected_target_size) {
103+
std::vector<int> result =
104+
get_inscribed_size(image_size, target_size, max_size);
105+
EXPECT_EQ(result[0], expected_target_size[0]); // height
106+
EXPECT_EQ(result[1], expected_target_size[1]); // width
107+
}
108+
TEST(PreprocessTest, GetInscribedSize) {
109+
test_get_inscribed_size({200, 100}, {1000, 1200}, 600, {600, 300});
110+
test_get_inscribed_size({2000, 200}, {1000, 1200}, 2000, {1000, 100});
111+
test_get_inscribed_size({400, 200}, {1000, 1200}, -1, {1000, 500});
112+
test_get_inscribed_size({1000, 500}, {400, 300}, -1, {400, 200});
113+
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
2+
3+
def define_common_targets():
4+
"""Defines targets that should be shared between fbcode and xplat.
5+
6+
The directory containing this targets.bzl file should also contain both
7+
TARGETS and BUCK files that call this function.
8+
"""
9+
10+
runtime.cxx_library(
11+
name = "preprocess",
12+
srcs = ["preprocess.cpp"],
13+
exported_headers = ["preprocess.h"],
14+
)
15+
16+
runtime.cxx_test(
17+
name = "preprocess_test",
18+
srcs = ["preprocess_test.cpp"],
19+
deps = [":preprocess"],
20+
)

0 commit comments

Comments
 (0)