|
10 | 10 |
|
11 | 11 | #include <utility>
|
12 | 12 | #include <vector>
|
| 13 | +#include <bitset> |
13 | 14 |
|
14 | 15 | #include <executorch/runtime/core/exec_aten/exec_aten.h>
|
15 | 16 |
|
@@ -3251,3 +3252,110 @@ TEST(VulkanComputeGraphOpsTest, test_transpose_with_mm) {
|
3251 | 3252 | test_transpose_view_mm(2, 7, 17, 5, storage_type);
|
3252 | 3253 | }
|
3253 | 3254 | }
|
| 3255 | + |
| 3256 | +void test_to_copy() { |
| 3257 | + GraphConfig config; |
| 3258 | + config.set_storage_type_override(utils::kTexture3D); |
| 3259 | + ComputeGraph graph(config); |
| 3260 | + int M = 8; |
| 3261 | + int N = 8; |
| 3262 | + int K = 8; |
| 3263 | + // Build graph |
| 3264 | + IOValueRef in = graph.add_input_tensor( |
| 3265 | + {1, M, N, K}, |
| 3266 | + vkapi::kFloat, |
| 3267 | + utils::GPUMemoryLayout::TENSOR_CHANNELS_PACKED); |
| 3268 | + |
| 3269 | + std::vector<float> data_in = |
| 3270 | + create_random_float_buffer(M * N * K, -1024, 1024); |
| 3271 | + graph.copy_into_staging(in.staging, data_in.data(), data_in.size()); |
| 3272 | + |
| 3273 | + IOValueRef out; |
| 3274 | + out.value = graph.add_tensor( |
| 3275 | + {1, M, N, K}, |
| 3276 | + vkapi::kHalf, |
| 3277 | + utils::GPUMemoryLayout::TENSOR_CHANNELS_PACKED); |
| 3278 | + |
| 3279 | + auto op = VK_GET_OP_FN("aten._to_copy.default"); |
| 3280 | + op(graph, |
| 3281 | + {in.value, |
| 3282 | + graph.add_none(), |
| 3283 | + graph.add_none(), |
| 3284 | + graph.add_none(), |
| 3285 | + graph.add_none(), |
| 3286 | + graph.add_none(), |
| 3287 | + graph.add_none(), |
| 3288 | + out.value}); |
| 3289 | + |
| 3290 | + out.staging = graph.set_output_tensor(out.value); |
| 3291 | + |
| 3292 | + graph.prepare(); |
| 3293 | + graph.encode_prepack(); |
| 3294 | + graph.prepack(); |
| 3295 | + graph.encode_execute(); |
| 3296 | + graph.propagate_resize(); |
| 3297 | + graph.execute(); |
| 3298 | + |
| 3299 | + std::vector<torch::executor::Half> output_data(graph.numel_of(out.value)); |
| 3300 | + graph.copy_from_staging(out.staging, output_data.data(), output_data.size()); |
| 3301 | + |
| 3302 | + EXPECT_EQ(data_in.size(), output_data.size()); |
| 3303 | + |
| 3304 | + float mse_ex = 0.0f; |
| 3305 | + float mse_vk = 0.0f; |
| 3306 | + |
| 3307 | + // check results |
| 3308 | + for (size_t i = 0; i < output_data.size(); ++i) { |
| 3309 | + float input = data_in[i]; |
| 3310 | + torch::executor::Half expected_output = |
| 3311 | + static_cast<torch::executor::Half>(input); |
| 3312 | + uint16_t* expected_bits = reinterpret_cast<uint16_t*>(&expected_output); |
| 3313 | + torch::executor::Half output = output_data[i]; |
| 3314 | + uint16_t* output_bits = reinterpret_cast<uint16_t*>(&output); |
| 3315 | + |
| 3316 | + std::string msg; |
| 3317 | + msg.reserve(64); |
| 3318 | + msg = "input = " + std::to_string(input) + "(0b" |
| 3319 | + + std::bitset<32>(*reinterpret_cast<uint32_t*>(&input)).to_string() |
| 3320 | + + "), expected output = " + std::to_string(expected_output) +"(0b" |
| 3321 | + + std::bitset<16>(*expected_bits).to_string() |
| 3322 | + + "), recieved output = " + std::to_string(output) + "(0b" |
| 3323 | + + std::bitset<16>(*output_bits).to_string() + ")"; |
| 3324 | + |
| 3325 | + std::cout << msg<< std::endl; |
| 3326 | + |
| 3327 | + // Note: Torch executor half "rounds up" when converting to fp16 whereas |
| 3328 | + // most driver implementations of Vulkan's opFConvert() just truncates the |
| 3329 | + // extra bits for performance (rounding introduces conditional). |
| 3330 | + // Example: |
| 3331 | + // INPUT F32 = 25.248 (sign{0b0}, exp{0b10000011}, |
| 3332 | + // mantissa{0b10010011111101111100111}), |
| 3333 | + // TORCH HALF OUTPUT F16 = 25.25 (sign{0b0}, exp{0b10011}, |
| 3334 | + // mantissa{0b1001010000}), |
| 3335 | + // VULKAN OUTPUT F16 = 25.2344 (sign{0b0}, exp{0b10011}, |
| 3336 | + // mantissa{0b1001001111}) |
| 3337 | + // Note: |
| 3338 | + // The vulkan mantissa exactly matches the first 10 |
| 3339 | + // bits of the input 23 bit mantissa. But since the 11th bit is 1, the |
| 3340 | + // torch half output is rounded up (essentially adding a 1). |
| 3341 | + // Vulkan mantissa{0b1001001111} + 1 = Torch half mantissa{0b1001010000} |
| 3342 | + |
| 3343 | + EXPECT_TRUE( |
| 3344 | + (*output_bits == *expected_bits) || |
| 3345 | + /*rounding error*/ ((*output_bits + 1u) == *expected_bits)); |
| 3346 | + mse_ex += std::pow(expected_output - input, 2); |
| 3347 | + mse_vk += std::pow(output - input, 2); |
| 3348 | + } |
| 3349 | + |
| 3350 | + mse_ex /= output_data.size(); |
| 3351 | + mse_vk /= output_data.size(); |
| 3352 | + std::cout << "=========================================================" |
| 3353 | + << std::endl; |
| 3354 | + std::cout << "mse_ex = " << mse_ex << ", mse_vk = " << mse_vk << std::endl; |
| 3355 | +} |
| 3356 | + |
| 3357 | +TEST(VulkanComputeGraphOpsTest, test_to_copy) { |
| 3358 | + if(context()->adapter_ptr()->has_16bit_storage()) { |
| 3359 | + test_to_copy(); |
| 3360 | + } |
| 3361 | +} |
0 commit comments