Skip to content

Preprocess C++ #4987

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 118 additions & 0 deletions examples/models/flamingo/preprocess/preprocess.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "executorch/examples/models/flamingo/preprocess/preprocess.h"

#include <algorithm>
#include <cassert>

std::vector<int> _get_factors(int n) {
std::vector<int> factors;
for (int i = 1; i <= n; i++) {
if (n % i == 0) {
factors.push_back(i);
}
}
return factors;
}

std::vector<std::vector<int>> find_supported_resolutions(
int max_num_tiles,
int tile_size) {
std::vector<std::vector<int>> supported_resolutions;
for (int _tile_size = max_num_tiles; _tile_size > 0; _tile_size--) {
auto factors = _get_factors(_tile_size);
for (int i = 0; i < factors.size(); i++) {
int height = factors[i];
int width = _tile_size / factors[i];
supported_resolutions.push_back({height * tile_size, width * tile_size});
}
}
return supported_resolutions;
}

std::vector<int> get_canvas_best_fit(
std::vector<int> image_size,
std::vector<std::vector<int>> possible_resolutions,
bool resize_to_max_canvas) {
assert(image_size.size() == 2);
int image_h = image_size[0];
int image_w = image_size[1];

float best_scale = -0.1;
std::vector<int> best_resolution;
int best_area = 0;

for (int i = 0; i < possible_resolutions.size(); i++) {
assert(possible_resolutions[i].size() == 2);
float scale_h = possible_resolutions[i][0] / (float)image_h;
float scale_w = possible_resolutions[i][1] / (float)image_w;

// Get limiting side scaling -> no distortion
float scale = scale_h < scale_w ? scale_h : scale_w;

bool is_candidate = false;

if (scale >= 1.0) {
// Upscaling options.
if (resize_to_max_canvas) {
is_candidate = scale >= best_scale;
} else {
is_candidate = ((scale <= best_scale) || (best_resolution.size() == 0));
}
} else {
// If no upscaling options, find the minimum downscaling (max scale for
// scales < 1)
is_candidate = ((scale >= best_scale) || (best_resolution.size() == 0));
}

// Select the best resolution.
if (is_candidate) {
// @lint-ignore CLANGTIDY facebook-hte-ParameterUncheckedArrayBounds
int area = possible_resolutions[i][0] * possible_resolutions[i][1];
if (scale == best_scale) {
// If there are multiple resolutions, get the one with minimum area to
// reduce padding.
if (scale >= 1.0 && area < best_area) {
best_resolution = possible_resolutions[i];
best_area = area;
}
} else {
best_resolution = possible_resolutions[i];
best_scale = scale;
best_area = area;
}
}
}
return best_resolution;
}

std::vector<int> get_inscribed_size(
std::vector<int> image_size,
std::vector<int> target_size,
int max_size) {
assert(image_size.size() == 2);
assert(target_size.size() == 2);

int target_height = target_size[0];
int target_width = target_size[1];

if (max_size > 0) {
target_height = std::min(std::max(image_size[0], max_size), target_size[0]);
target_width = std::min(std::max(image_size[1], max_size), target_size[1]);
}

int resize_height = std::min(
(int)(image_size[0] * (target_width / (float)image_size[1])),
target_height);
int resize_width = std::min(
(int)(image_size[1] * (target_height / (float)image_size[0])),
target_width);

return {resize_height, resize_width};
}
41 changes: 41 additions & 0 deletions examples/models/flamingo/preprocess/preprocess.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <vector>

// C++ implementation of the python functions in torchtune:
// https://github.com/pytorch/torchtune/tree/main/torchtune/modules/transforms/vision_utils

// Calculate all factors of a given number.
std::vector<int> _get_factors(int n);

// Computes all combinations of resolutions, multiple of tile_size,
// that contain up to max_num_tiles. Useful for when dividing an image into
// tiles. For example, if we want at most 2 tiles per image, then we can support
// the following resolutions: (1x1, 1x2, 2x1) * tile_size Returns a vector of
// tuples of (height, width).
std::vector<std::vector<int>> find_supported_resolutions(
int max_num_tiles,
int tile_size);

// Determines the best canvas possible from a list of possible resolutions to
// resize an image to, without distortion.
std::vector<int> get_canvas_best_fit(
std::vector<int> image_size,
std::vector<std::vector<int>> possible_resolutions,
bool resize_to_max_canvas);

// Calculates the size of an image, if it was resized to be inscribed within the
// target_size. It is upscaled or downscaled such that one size is equal to the
// target_size, and the second size is less than or equal to the target_size.
std::vector<int> get_inscribed_size(
std::vector<int> image_size,
std::vector<int> canvas_size,
int max_size);
113 changes: 113 additions & 0 deletions examples/models/flamingo/preprocess/preprocess_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/examples/models/flamingo/preprocess/preprocess.h>
#include <gtest/gtest.h>

using namespace ::testing;

// Mirror the torchtune python testing:
// https://github.com/pytorch/torchtune/tree/main/tests/torchtune/modules/transforms

void test_find_supported_resolutions(
int max_num_tiles,
int tile_size,
std::vector<std::vector<int>> expected_resolutions) {
std::vector<std::vector<int>> resolutions =
find_supported_resolutions(max_num_tiles, tile_size);

EXPECT_EQ(resolutions.size(), expected_resolutions.size());

for (int i = 0; i < resolutions.size(); i++) {
EXPECT_EQ(resolutions[i].size(), expected_resolutions[i].size());
EXPECT_EQ(resolutions[i][0], expected_resolutions[i][0]); // height
EXPECT_EQ(resolutions[i][1], expected_resolutions[i][1]); // width
}
}

TEST(PreprocessTest, TestFindSupportedResolution) {
test_find_supported_resolutions(1, 224, {{224, 224}});
test_find_supported_resolutions(2, 100, {{100, 200}, {200, 100}, {100, 100}});
test_find_supported_resolutions(
3, 50, {{50, 150}, {150, 50}, {50, 100}, {100, 50}, {50, 50}});
test_find_supported_resolutions(
4,
300,
{
{300, 1200},
{600, 600},
{1200, 300},
{300, 900},
{900, 300},
{300, 600},
{600, 300},
{300, 300},
});
}

void test_get_canvas_best_fit(
std::vector<int> image_size,
std::vector<std::vector<int>> possible_resolutions,
bool resize_to_max_canvas,
std::vector<int> expected_best_resolution) {
std::vector<int> best_resolution = get_canvas_best_fit(
image_size, possible_resolutions, resize_to_max_canvas);
EXPECT_EQ(best_resolution[0], expected_best_resolution[0]); // height
EXPECT_EQ(best_resolution[1], expected_best_resolution[1]); // width
}

TEST(PreprocessTest, TestGetCanvasBestFit_200x300_F) {
std::vector<std::vector<int>> possible_resolutions = {
{224, 896},
{448, 448},
{224, 224},
{896, 224},
{224, 672},
{672, 224},
{224, 448},
{448, 224},
};
test_get_canvas_best_fit(
{200, 300},
possible_resolutions,
false, // resize_to_max_canvas
{224, 448});

test_get_canvas_best_fit(
{200, 500},
possible_resolutions,
true, // resize_to_max_canvas
{224, 672});
test_get_canvas_best_fit(
{200, 200},
possible_resolutions,
false, // resize_to_max_canvas
{224, 224});
test_get_canvas_best_fit(
{200, 100},
possible_resolutions,
true, // resize_to_max_canvas
{448, 224});
}

void test_get_inscribed_size(
std::vector<int> image_size,
std::vector<int> target_size,
int max_size,
std::vector<int> expected_target_size) {
std::vector<int> result =
get_inscribed_size(image_size, target_size, max_size);
EXPECT_EQ(result[0], expected_target_size[0]); // height
EXPECT_EQ(result[1], expected_target_size[1]); // width
}
TEST(PreprocessTest, GetInscribedSize) {
test_get_inscribed_size({200, 100}, {1000, 1200}, 600, {600, 300});
test_get_inscribed_size({2000, 200}, {1000, 1200}, 2000, {1000, 100});
test_get_inscribed_size({400, 200}, {1000, 1200}, -1, {1000, 500});
test_get_inscribed_size({1000, 500}, {400, 300}, -1, {400, 200});
}
20 changes: 20 additions & 0 deletions examples/models/flamingo/preprocess/targets.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_common_targets():
"""Defines targets that should be shared between fbcode and xplat.
The directory containing this targets.bzl file should also contain both
TARGETS and BUCK files that call this function.
"""

runtime.cxx_library(
name = "preprocess",
srcs = ["preprocess.cpp"],
exported_headers = ["preprocess.h"],
)

runtime.cxx_test(
name = "preprocess_test",
srcs = ["preprocess_test.cpp"],
deps = [":preprocess"],
)
Loading