intel · vladimirlaz · Mar 16, 2021 · Mar 12, 2021 · Mar 12, 2021
@@ -0,0 +1,120 @@
+// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
+// RUN: %HOST_RUN_PLACEHOLDER %t.out %HOST_CHECK_PLACEHOLDER
+// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
+// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER
+
+#include <CL/sycl.hpp>
+
+using namespace cl::sycl;
+
+using pixelT = sycl::float4;
+
+// will output a pixel as {r,g,b,a}.  provide override if a different pixelT is
+// defined.
+void outputPixel(sycl::float4 somePixel) {
+  std::cout << "{" << somePixel[0] << "," << somePixel[1] << "," << somePixel[2]
+            << "," << somePixel[3] << "} ";
+}
+
+// 4 pixels on a side. 1D at the moment
+constexpr long width = 4;
+
+void test_rw(image_channel_order ChanOrder, image_channel_type ChanType) {
+  int numTests = 4; // drives the size of the testResults buffer, and the number
+                    // of report iterations. Kludge.
+
+  // we'll use these four pixels for our image. Makes it easy to measure
+  // interpolation and spot "off-by-one" probs.
+  // These values will work consistently with different levels of float
+  // precision (like unorm_int8 vs. fp32)
+  pixelT leftEdge{0.2f, 0.4f, 0.6f, 0.8f};
+  pixelT body{0.6f, 0.4f, 0.2f, 0.0f};
+  pixelT bony{0.2f, 0.4f, 0.6f, 0.8f};
+  pixelT rightEdge{0.6f, 0.4f, 0.2f, 0.0f};
+
+  queue Q;
+  const sycl::range<1> ImgRange_1D(width);
+  { // closure
+    // - create an image
+    image<1> image_1D(ChanOrder, ChanType, ImgRange_1D);
+    event E_Setup = Q.submit([&](handler &cgh) {
+      auto image_acc = image_1D.get_access<pixelT, access::mode::write>(cgh);
+      cgh.single_task<class setupUnormLinear>([=]() {
+        image_acc.write(0, leftEdge);
+        image_acc.write(1, body);
+        image_acc.write(2, bony);
+        image_acc.write(3, rightEdge);
+      });
+    });
+    E_Setup.wait();
+
+    // use a buffer to report back test results.
+    buffer<pixelT, 1> testResults((range<1>(numTests)));
+
+    event E_Test = Q.submit([&](handler &cgh) {
+      auto image_acc = image_1D.get_access<pixelT, access::mode::read>(cgh);
+      auto test_acc = testResults.get_access<access::mode::write>(cgh);
+
+      cgh.single_task<class im1D_Unorm_Linear>([=]() {
+        int i = 0; // the index for writing into the testResult buffer.
+
+        // verify our four pixels were set up correctly.
+        // 0-3 read four pixels. no sampler
+        test_acc[i++] = image_acc.read(0); // {1,2,3,4}
+        test_acc[i++] = image_acc.read(1); // {49,48,47,46}
+        test_acc[i++] = image_acc.read(2); // {59,58,57,56}
+        test_acc[i++] = image_acc.read(3); // {11,12,13,14}
+
+        // Add more tests below. Just be sure to increase the numTests counter
+        // at the beginning of this function
+      });
+    });
+    E_Test.wait();
+
+    // REPORT RESULTS
+    auto test_acc = testResults.get_access<access::mode::read>();
+    for (int i = 0, idx = 0; i < numTests; i++, idx++) {
+      if (i == 0) {
+        idx = 0;
+        std::cout << "read four pixels, no sampler" << std::endl;
+      }
+
+      pixelT testPixel = test_acc[i];
+      std::cout << i << /* " -- " << idx << */ ": ";
+      outputPixel(testPixel);
+      std::cout << std::endl;
+    }
+  } // ~image / ~buffer
+}
+
+int main() {
+
+  queue Q;
+  device D = Q.get_device();
+
+  if (D.has(aspect::image)) {
+    // the _int8 channels are one byte per channel, or four bytes per pixel (for
+    // RGBA) the _int16/fp16 channels are two bytes per channel, or eight bytes
+    // per pixel (for RGBA) the _int32/fp32  channels are four bytes per
+    // channel, or sixteen bytes per pixel (for RGBA).
+
+    std::cout << "fp32 -------------" << std::endl;
+    test_rw(image_channel_order::rgba, image_channel_type::fp32);
+
+    // CUDA, strangely, does not support 8-bit channels. Turning this off for
+    // now.
+    // std::cout << "unorm_int8 -------" << std::endl;
+    // test_rw(image_channel_order::rgba, image_channel_type::unorm_int8);
+  } else {
+    std::cout << "device does not support image operations" << std::endl;
+  }
+
+  return 0;
+}
+
+// CHECK: fp32 -------------
+// CHECK-NEXT: read four pixels, no sampler
+// CHECK-NEXT: 0: {0.2,0.4,0.6,0.8}
+// CHECK-NEXT: 1: {0.6,0.4,0.2,0}
+// CHECK-NEXT: 2: {0.2,0.4,0.6,0.8}
+// CHECK-NEXT: 3: {0.6,0.4,0.2,0}
@@ -2,13 +2,14 @@
 // RUN: %HOST_RUN_PLACEHOLDER %t.out %HOST_CHECK_PLACEHOLDER
 // RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER
 // RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER
-// XFAIL: gpu && (level_zero || opencl || cuda)
-// XFAIL: cpu
 
-// GPU does not correctly interpolate when using clamp.  Waiting on fix.
-// Both OCL and LevelZero have this issue.
-// CPU failing all linear interpolation at moment. Waiting on fix.
-// CUDA fails all linear interpolation. Waiting on fix.
+// UNSUPPORTED: level_zero && windows
+// XFAIL: cuda
+
+// LevelZero on Windows hangs with normalized coordinates. Waiting on fix.
+
+// CUDA works with image_channel_type::fp32, but not with any 8-bit per channel
+// type (such as unorm_int8)
 
 /*
     This file sets up an image, initializes it with data,
@@ -23,11 +24,11 @@
 using namespace cl::sycl;
 
 // pixel data-type for RGBA operations (which is the minimum image type)
-using pixelT = sycl::uint4;
+using pixelT = sycl::float4;
 
 // will output a pixel as {r,g,b,a}.  provide override if a different pixelT is
 // defined.
-void outputPixel(sycl::uint4 somePixel) {
+void outputPixel(sycl::float4 somePixel) {
   std::cout << "{" << somePixel[0] << "," << somePixel[1] << "," << somePixel[2]
             << "," << somePixel[3] << "} ";
 }
@@ -47,10 +48,12 @@ void test_normalized_clamp_linear_sampler(image_channel_order ChanOrder,
 
   // we'll use these four pixels for our image. Makes it easy to measure
   // interpolation and spot "off-by-one" probs.
-  pixelT leftEdge{1, 2, 3, 4};
-  pixelT body{49, 48, 47, 46};
-  pixelT bony{59, 58, 57, 56};
-  pixelT rightEdge{11, 12, 13, 14};
+  // These values will work consistently with different levels of float
+  // precision (like unorm_int8 vs. fp32)
+  pixelT leftEdge{0.2f, 0.4f, 0.6f, 0.8f};
+  pixelT body{0.6f, 0.4f, 0.2f, 0.0f};
+  pixelT bony{0.2f, 0.4f, 0.6f, 0.8f};
+  pixelT rightEdge{0.6f, 0.4f, 0.2f, 0.0f};
 
   queue Q;
   const sycl::range<1> ImgRange_1D(width);
@@ -96,31 +99,32 @@ void test_normalized_clamp_linear_sampler(image_channel_order ChanOrder,
             image_acc.read(-0.25f, Norm_Clamp_Linear_sampler); // {0,0,0,0}
         test_acc[i++] = image_acc.read(
             0.00f,
-            Norm_Clamp_Linear_sampler); // {0,1,2,2} // interpolating with bg
-                                        // color. consistent with unnormalized.
-                                        // Doesn't seem 100% correct to me, but
-                                        // don't ahve anything to compare
-                                        // against presnetly
-        test_acc[i++] =
-            image_acc.read(0.25f, Norm_Clamp_Linear_sampler); // {25,25,25,25}
-        test_acc[i++] =
-            image_acc.read(0.50f, Norm_Clamp_Linear_sampler); // {54,53,52,51}
-        test_acc[i++] =
-            image_acc.read(0.75f, Norm_Clamp_Linear_sampler); // {35,35,35,35}
+            Norm_Clamp_Linear_sampler); // {0.1,0.2,0.3,0.4} // interpolating
+                                        // with bg color. consistent with
+                                        // unnormalized. Doesn't seem 100%
+                                        // correct to me, but don't ahve
+                                        // anything to compare against presnetly
         test_acc[i++] = image_acc.read(
-            1.00f,
-            Norm_Clamp_Linear_sampler); // {6,6,6,7}  // interpolating with bg
+            0.25f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
+        test_acc[i++] = image_acc.read(
+            0.50f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
+        test_acc[i++] = image_acc.read(
+            0.75f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
+        test_acc[i++] =
+            image_acc.read(1.00f,
+                           Norm_Clamp_Linear_sampler); // {0.3,0.2,0.1,0}  //
+                                                       // interpolating with bg
         test_acc[i++] =
             image_acc.read(1.25f, Norm_Clamp_Linear_sampler); // {0,0,0,0}
 
         // 7-8 read two pixels on either side of first pixel. float coordinates.
         // CLAMP
         //  on GPU CLAMP is apparently stopping the interpolation. ( values on
         //  right are expected value)
-        test_acc[i++] =
-            image_acc.read(0.2499f, Norm_Clamp_Linear_sampler); // {25,25,25,25}
-        test_acc[i++] =
-            image_acc.read(0.2501f, Norm_Clamp_Linear_sampler); // {25,25,25,25}
+        test_acc[i++] = image_acc.read(
+            0.2499999f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
+        test_acc[i++] = image_acc.read(
+            0.2500001f, Norm_Clamp_Linear_sampler); // {0.4,0.4,0.4,0.4}
       });
     });
     E_Test.wait();
@@ -130,9 +134,10 @@ void test_normalized_clamp_linear_sampler(image_channel_order ChanOrder,
     for (int i = 0, idx = 0; i < numTests; i++, idx++) {
       if (i == 0) {
         idx = -1;
-        std::cout << "read six pixels at 'boundary' locations, starting out of "
-                     "bounds,  sample:   Normalized +  Clamp  + Linear"
-                  << std::endl;
+        std::cout
+            << "read seven pixels at 'boundary' locations, starting out of "
+               "bounds,  sample:   Normalized +  Clamp  + Linear"
+            << std::endl;
       }
       if (i == 7) {
         idx = 1;
@@ -161,10 +166,14 @@ int main() {
     // RGBA) the _int16/fp16 channels are two bytes per channel, or eight bytes
     // per pixel (for RGBA) the _int32/fp32  channels are four bytes per
     // channel, or sixteen bytes per pixel (for RGBA).
-    // CUDA has limited support for image_channel_type, so the tests use
-    // unsigned_int32
+
+    std::cout << "fp32 -------------" << std::endl;
     test_normalized_clamp_linear_sampler(image_channel_order::rgba,
-                                         image_channel_type::unsigned_int32);
+                                         image_channel_type::fp32);
+
+    std::cout << "unorm_int8 -------" << std::endl;
+    test_normalized_clamp_linear_sampler(image_channel_order::rgba,
+                                         image_channel_type::unorm_int8);
   } else {
     std::cout << "device does not support image operations" << std::endl;
   }
@@ -173,15 +182,28 @@ int main() {
 }
 
 // clang-format off
-// CHECK: read six pixels at 'boundary' locations, starting out of bounds,  sample:   Normalized +  Clamp  + Linear
+// CHECK: fp32 -------------
+// CHECK-NEXT: read seven pixels at 'boundary' locations, starting out of bounds,  sample:   Normalized +  Clamp  + Linear
+// CHECK-NEXT: 0 -- -1: {0,0,0,0} 
+// CHECK-NEXT: 1 -- 0: {0.1,0.2,0.3,0.4} 
+// CHECK-NEXT: 2 -- 1: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: 3 -- 2: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: 4 -- 3: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: 5 -- 4: {0.3,0.2,0.1,0} 
+// CHECK-NEXT: 6 -- 5: {0,0,0,0} 
+// CHECK-NEXT: read two pixels on either side of first pixel. float coordinates. Normalized +  Clamp  + Linear
+// CHECK-NEXT: 7 -- 1: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: 8 -- 1: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: unorm_int8 -------
+// CHECK-NEXT: read seven pixels at 'boundary' locations, starting out of bounds,  sample:   Normalized +  Clamp  + Linear
 // CHECK-NEXT: 0 -- -1: {0,0,0,0} 
-// CHECK-NEXT: 1 -- 0: {0,1,2,2} 
-// CHECK-NEXT: 2 -- 1: {25,25,25,25} 
-// CHECK-NEXT: 3 -- 2: {54,53,52,51} 
-// CHECK-NEXT: 4 -- 3: {35,35,35,35} 
-// CHECK-NEXT: 5 -- 4: {6,6,6,7} 
+// CHECK-NEXT: 1 -- 0: {0.1,0.2,0.3,0.4} 
+// CHECK-NEXT: 2 -- 1: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: 3 -- 2: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: 4 -- 3: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: 5 -- 4: {0.3,0.2,0.1,0} 
 // CHECK-NEXT: 6 -- 5: {0,0,0,0} 
 // CHECK-NEXT: read two pixels on either side of first pixel. float coordinates. Normalized +  Clamp  + Linear
-// CHECK-NEXT: 7 -- 1: {25,25,25,25} 
-// CHECK-NEXT: 8 -- 1: {25,25,25,25}
+// CHECK-NEXT: 7 -- 1: {0.4,0.4,0.4,0.4} 
+// CHECK-NEXT: 8 -- 1: {0.4,0.4,0.4,0.4}
 // clang-format on