Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

[SYCL] Update Sampler Linear Filter tests #179

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions SYCL/Sampler/basic-rw-float.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
// RUN: %HOST_RUN_PLACEHOLDER %t.out %HOST_CHECK_PLACEHOLDER
// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER

#include <CL/sycl.hpp>

using namespace cl::sycl;

using pixelT = sycl::float4;

// will output a pixel as {r,g,b,a}. provide override if a different pixelT is
// defined.
void outputPixel(sycl::float4 somePixel) {
std::cout << "{" << somePixel[0] << "," << somePixel[1] << "," << somePixel[2]
<< "," << somePixel[3] << "} ";
}

// 4 pixels on a side. 1D at the moment
constexpr long width = 4;

void test_rw(image_channel_order ChanOrder, image_channel_type ChanType) {
int numTests = 4; // drives the size of the testResults buffer, and the number
// of report iterations. Kludge.

// we'll use these four pixels for our image. Makes it easy to measure
// interpolation and spot "off-by-one" probs.
// These values will work consistently with different levels of float
// precision (like unorm_int8 vs. fp32)
pixelT leftEdge{0.2f, 0.4f, 0.6f, 0.8f};
pixelT body{0.6f, 0.4f, 0.2f, 0.0f};
pixelT bony{0.2f, 0.4f, 0.6f, 0.8f};
pixelT rightEdge{0.6f, 0.4f, 0.2f, 0.0f};

queue Q;
const sycl::range<1> ImgRange_1D(width);
{ // closure
// - create an image
image<1> image_1D(ChanOrder, ChanType, ImgRange_1D);
event E_Setup = Q.submit([&](handler &cgh) {
auto image_acc = image_1D.get_access<pixelT, access::mode::write>(cgh);
cgh.single_task<class setupUnormLinear>([=]() {
image_acc.write(0, leftEdge);
image_acc.write(1, body);
image_acc.write(2, bony);
image_acc.write(3, rightEdge);
});
});
E_Setup.wait();

// use a buffer to report back test results.
buffer<pixelT, 1> testResults((range<1>(numTests)));

event E_Test = Q.submit([&](handler &cgh) {
auto image_acc = image_1D.get_access<pixelT, access::mode::read>(cgh);
auto test_acc = testResults.get_access<access::mode::write>(cgh);

cgh.single_task<class im1D_Unorm_Linear>([=]() {
int i = 0; // the index for writing into the testResult buffer.

// verify our four pixels were set up correctly.
// 0-3 read four pixels. no sampler
test_acc[i++] = image_acc.read(0); // {1,2,3,4}
test_acc[i++] = image_acc.read(1); // {49,48,47,46}
test_acc[i++] = image_acc.read(2); // {59,58,57,56}
test_acc[i++] = image_acc.read(3); // {11,12,13,14}

// Add more tests below. Just be sure to increase the numTests counter
// at the beginning of this function
});
});
E_Test.wait();

// REPORT RESULTS
auto test_acc = testResults.get_access<access::mode::read>();
for (int i = 0, idx = 0; i < numTests; i++, idx++) {
if (i == 0) {
idx = 0;
std::cout << "read four pixels, no sampler" << std::endl;
}

pixelT testPixel = test_acc[i];
std::cout << i << /* " -- " << idx << */ ": ";
outputPixel(testPixel);
std::cout << std::endl;
}
} // ~image / ~buffer
}

int main() {

queue Q;
device D = Q.get_device();

if (D.has(aspect::image)) {
// the _int8 channels are one byte per channel, or four bytes per pixel (for
// RGBA) the _int16/fp16 channels are two bytes per channel, or eight bytes
// per pixel (for RGBA) the _int32/fp32 channels are four bytes per
// channel, or sixteen bytes per pixel (for RGBA).

std::cout << "fp32 -------------" << std::endl;
test_rw(image_channel_order::rgba, image_channel_type::fp32);

// CUDA, strangely, does not support 8-bit channels. Turning this off for
// now.
// std::cout << "unorm_int8 -------" << std::endl;
// test_rw(image_channel_order::rgba, image_channel_type::unorm_int8);
} else {
std::cout << "device does not support image operations" << std::endl;
}

return 0;
}

// CHECK: fp32 -------------
// CHECK-NEXT: read four pixels, no sampler
// CHECK-NEXT: 0: {0.2,0.4,0.6,0.8}
// CHECK-NEXT: 1: {0.6,0.4,0.2,0}
// CHECK-NEXT: 2: {0.2,0.4,0.6,0.8}
// CHECK-NEXT: 3: {0.6,0.4,0.2,0}
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
// RUN: %HOST_RUN_PLACEHOLDER %t.out %HOST_CHECK_PLACEHOLDER
// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER
// XFAIL: gpu && (level_zero || opencl || cuda)
// XFAIL: cpu

// GPU does not correctly interpolate when using clamp. Waiting on fix.
// Both OCL and LevelZero have this issue.
// CPU failing all linear interpolation at moment. Waiting on fix.
// CUDA fails all linear interpolation. Waiting on fix.
// UNSUPPORTED: level_zero && windows
// XFAIL: cuda

// LevelZero on Windows hangs with normalized coordinates. Waiting on fix.

// CUDA works with image_channel_type::fp32, but not with any 8-bit per channel
// type (such as unorm_int8)

/*
This file sets up an image, initializes it with data,
Expand All @@ -23,11 +24,11 @@
using namespace cl::sycl;

// pixel data-type for RGBA operations (which is the minimum image type)
using pixelT = sycl::uint4;
using pixelT = sycl::float4;

// will output a pixel as {r,g,b,a}. provide override if a different pixelT is
// defined.
void outputPixel(sycl::uint4 somePixel) {
void outputPixel(sycl::float4 somePixel) {
std::cout << "{" << somePixel[0] << "," << somePixel[1] << "," << somePixel[2]
<< "," << somePixel[3] << "} ";
}
Expand All @@ -47,10 +48,12 @@ void test_normalized_clamp_linear_sampler(image_channel_order ChanOrder,

// we'll use these four pixels for our image. Makes it easy to measure
// interpolation and spot "off-by-one" probs.
pixelT leftEdge{1, 2, 3, 4};
pixelT body{49, 48, 47, 46};
pixelT bony{59, 58, 57, 56};
pixelT rightEdge{11, 12, 13, 14};
// These values will work consistently with different levels of float
// precision (like unorm_int8 vs. fp32)
pixelT leftEdge{0.2f, 0.4f, 0.6f, 0.8f};
pixelT body{0.6f, 0.4f, 0.2f, 0.0f};
pixelT bony{0.2f, 0.4f, 0.6f, 0.8f};
pixelT rightEdge{0.6f, 0.4f, 0.2f, 0.0f};

queue Q;
const sycl::range<1> ImgRange_1D(width);
Expand Down Expand Up @@ -96,31 +99,32 @@ void test_normalized_clamp_linear_sampler(image_channel_order ChanOrder,
image_acc.read(-0.25f, Norm_Clamp_Linear_sampler); // {0,0,0,0}
test_acc[i++] = image_acc.read(
0.00f,
Norm_Clamp_Linear_sampler); // {0,1,2,2} // interpolating with bg
// color. consistent with unnormalized.
// Doesn't seem 100% correct to me, but
// don't ahve anything to compare
// against presnetly
test_acc[i++] =
image_acc.read(0.25f, Norm_Clamp_Linear_sampler); // {25,25,25,25}
test_acc[i++] =
image_acc.read(0.50f, Norm_Clamp_Linear_sampler); // {54,53,52,51}
test_acc[i++] =
image_acc.read(0.75f, Norm_Clamp_Linear_sampler); // {35,35,35,35}
Norm_Clamp_Linear_sampler); // {0.1,0.2,0.3,0.4} // interpolating
// with bg color. consistent with
// unnormalized. Doesn't seem 100%
// correct to me, but don't ahve
// anything to compare against presnetly
test_acc[i++] = image_acc.read(
1.00f,
Norm_Clamp_Linear_sampler); // {6,6,6,7} // interpolating with bg
0.25f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
test_acc[i++] = image_acc.read(
0.50f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
test_acc[i++] = image_acc.read(
0.75f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
test_acc[i++] =
image_acc.read(1.00f,
Norm_Clamp_Linear_sampler); // {0.3,0.2,0.1,0} //
// interpolating with bg
test_acc[i++] =
image_acc.read(1.25f, Norm_Clamp_Linear_sampler); // {0,0,0,0}

// 7-8 read two pixels on either side of first pixel. float coordinates.
// CLAMP
// on GPU CLAMP is apparently stopping the interpolation. ( values on
// right are expected value)
test_acc[i++] =
image_acc.read(0.2499f, Norm_Clamp_Linear_sampler); // {25,25,25,25}
test_acc[i++] =
image_acc.read(0.2501f, Norm_Clamp_Linear_sampler); // {25,25,25,25}
test_acc[i++] = image_acc.read(
0.2499999f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
test_acc[i++] = image_acc.read(
0.2500001f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
});
});
E_Test.wait();
Expand All @@ -130,9 +134,10 @@ void test_normalized_clamp_linear_sampler(image_channel_order ChanOrder,
for (int i = 0, idx = 0; i < numTests; i++, idx++) {
if (i == 0) {
idx = -1;
std::cout << "read six pixels at 'boundary' locations, starting out of "
"bounds, sample: Normalized + Clamp + Linear"
<< std::endl;
std::cout
<< "read seven pixels at 'boundary' locations, starting out of "
"bounds, sample: Normalized + Clamp + Linear"
<< std::endl;
}
if (i == 7) {
idx = 1;
Expand Down Expand Up @@ -161,10 +166,14 @@ int main() {
// RGBA) the _int16/fp16 channels are two bytes per channel, or eight bytes
// per pixel (for RGBA) the _int32/fp32 channels are four bytes per
// channel, or sixteen bytes per pixel (for RGBA).
// CUDA has limited support for image_channel_type, so the tests use
// unsigned_int32

std::cout << "fp32 -------------" << std::endl;
test_normalized_clamp_linear_sampler(image_channel_order::rgba,
image_channel_type::unsigned_int32);
image_channel_type::fp32);

std::cout << "unorm_int8 -------" << std::endl;
test_normalized_clamp_linear_sampler(image_channel_order::rgba,
image_channel_type::unorm_int8);
} else {
std::cout << "device does not support image operations" << std::endl;
}
Expand All @@ -173,15 +182,28 @@ int main() {
}

// clang-format off
// CHECK: read six pixels at 'boundary' locations, starting out of bounds, sample: Normalized + Clamp + Linear
// CHECK: fp32 -------------
// CHECK-NEXT: read seven pixels at 'boundary' locations, starting out of bounds, sample: Normalized + Clamp + Linear
// CHECK-NEXT: 0 -- -1: {0,0,0,0}
// CHECK-NEXT: 1 -- 0: {0.1,0.2,0.3,0.4}
// CHECK-NEXT: 2 -- 1: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: 3 -- 2: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: 4 -- 3: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: 5 -- 4: {0.3,0.2,0.1,0}
// CHECK-NEXT: 6 -- 5: {0,0,0,0}
// CHECK-NEXT: read two pixels on either side of first pixel. float coordinates. Normalized + Clamp + Linear
// CHECK-NEXT: 7 -- 1: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: 8 -- 1: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: unorm_int8 -------
// CHECK-NEXT: read seven pixels at 'boundary' locations, starting out of bounds, sample: Normalized + Clamp + Linear
// CHECK-NEXT: 0 -- -1: {0,0,0,0}
// CHECK-NEXT: 1 -- 0: {0,1,2,2}
// CHECK-NEXT: 2 -- 1: {25,25,25,25}
// CHECK-NEXT: 3 -- 2: {54,53,52,51}
// CHECK-NEXT: 4 -- 3: {35,35,35,35}
// CHECK-NEXT: 5 -- 4: {6,6,6,7}
// CHECK-NEXT: 1 -- 0: {0.1,0.2,0.3,0.4}
// CHECK-NEXT: 2 -- 1: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: 3 -- 2: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: 4 -- 3: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: 5 -- 4: {0.3,0.2,0.1,0}
// CHECK-NEXT: 6 -- 5: {0,0,0,0}
// CHECK-NEXT: read two pixels on either side of first pixel. float coordinates. Normalized + Clamp + Linear
// CHECK-NEXT: 7 -- 1: {25,25,25,25}
// CHECK-NEXT: 8 -- 1: {25,25,25,25}
// CHECK-NEXT: 7 -- 1: {0.4,0.4,0.4,0.4}
// CHECK-NEXT: 8 -- 1: {0.4,0.4,0.4,0.4}
// clang-format on
Loading