Skip to content

Commit 76ec3f0

Browse files
authored
[SYCL][Bindless][Exp] Add Support For Unsampled Image Arrays (#12464)
- Creation / destruction of unsampled image arrays - Fetching / writing of unsampled image arrays - `sycl::ext::oneapi::experimental::image_type::array` enum value added - `sycl::ext::oneapi::experimental::image_descriptor::array_size` member added - `sycl::ext::oneapi::experimental::image_descriptor::verify()` member function added Correlated UR PR: [[Bindless][Exp] Add Support For Image Arrays #1274](oneapi-src/unified-runtime#1274)
1 parent d9c9cd1 commit 76ec3f0

File tree

15 files changed

+2754
-601
lines changed

15 files changed

+2754
-601
lines changed

libclc/ptx-nvidiacl/libspirv/images/image.cl

Lines changed: 1448 additions & 393 deletions
Large diffs are not rendered by default.

libclc/ptx-nvidiacl/libspirv/images/image_helpers.ll

Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,14 @@ define <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %s) nounwind always
4242
ret <4 x i32> %v
4343
}
4444

45+
define <2 x i32> @__clc_struct32_to_vector2({i32,i32} %s) nounwind alwaysinline {
46+
%a = alloca {i32,i32}
47+
store {i32,i32} %s, {i32,i32}* %a
48+
%bc = bitcast {i32,i32} * %a to <2 x i32> *
49+
%v = load <2 x i32>, <2 x i32> * %bc, align 128
50+
ret <2 x i32> %v
51+
}
52+
4553
define <4 x float> @__clc_structf32_to_vector({float,float,float,float} %s) nounwind alwaysinline {
4654
%a = alloca {float,float,float,float}
4755
store {float,float,float,float} %s, {float,float,float,float}* %a
@@ -485,3 +493,182 @@ entry:
485493
%1 = tail call <4 x i32>@__clc_struct32_to_vector({i32,i32,i32,i32} %0)
486494
ret <4 x i32> %1
487495
}
496+
497+
; <--- IMAGE ARRAYS --->
498+
499+
; Surface Reads
500+
;
501+
; @llvm.nvvm.suld.<NDims>.array.v<NChannels><DType>.clamp
502+
;
503+
; <NDims> = { 1d, 2d, 3d }
504+
; <NChannels> = { 2, 4 }
505+
; <Dtype> = { i8, i16, i32 }
506+
;
507+
; Note: The case of NChannels=1 doesn't need to be handled here as it can be
508+
; called directly.
509+
510+
511+
; @llvm.nvvm.suld.<NDims>.array.v<NChannels>{i8, i16, i32}.clamp
512+
513+
; - @llvm.nvvm.suld.<NDims>.array.v{2, 4}i8.clamp
514+
515+
; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i8.clamp
516+
517+
declare {i16,i16} @llvm.nvvm.suld.1d.array.v2i8.clamp(i64, i32, i32)
518+
define <2 x i16> @__clc_llvm_nvvm_suld_1d_array_v2i8_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline {
519+
entry:
520+
%0 = tail call {i16,i16} @llvm.nvvm.suld.1d.array.v2i8.clamp(i64 %img, i32 %idx, i32 %x);
521+
%1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0)
522+
ret <2 x i16> %1
523+
}
524+
525+
declare {i16,i16} @llvm.nvvm.suld.2d.array.v2i8.clamp(i64, i32, i32, i32)
526+
define <2 x i16> @__clc_llvm_nvvm_suld_2d_array_v2i8_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline {
527+
entry:
528+
%0 = tail call {i16,i16} @llvm.nvvm.suld.2d.array.v2i8.clamp(i64 %img, i32 %idx, i32 %x, i32 %y);
529+
%1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0)
530+
ret <2 x i16> %1
531+
}
532+
533+
declare {i16,i16} @llvm.nvvm.suld.3d.array.v2i8.clamp(i64, i32, i32, i32, i32)
534+
define <2 x i16> @__clc_llvm_nvvm_suld_3d_array_v2i8_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
535+
entry:
536+
%0 = tail call {i16,i16} @llvm.nvvm.suld.3d.array.v2i8.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z);
537+
%1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0)
538+
ret <2 x i16> %1
539+
}
540+
541+
; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i8.clamp
542+
543+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.array.v4i8.clamp(i64, i32, i32)
544+
define <4 x i16> @__clc_llvm_nvvm_suld_1d_array_v4i8_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline {
545+
entry:
546+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.array.v4i8.clamp(i64 %img, i32 %idx, i32 %x);
547+
%1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0)
548+
ret <4 x i16> %1
549+
}
550+
551+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.array.v4i8.clamp(i64, i32, i32, i32)
552+
define <4 x i16> @__clc_llvm_nvvm_suld_2d_array_v4i8_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline {
553+
entry:
554+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.array.v4i8.clamp(i64 %img, i32 %idx, i32 %x, i32 %y);
555+
%1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0)
556+
ret <4 x i16> %1
557+
}
558+
559+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.array.v4i8.clamp(i64, i32, i32, i32, i32)
560+
define <4 x i16> @__clc_llvm_nvvm_suld_3d_array_v4i8_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
561+
entry:
562+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.array.v4i8.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z);
563+
%1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0)
564+
ret <4 x i16> %1
565+
}
566+
567+
; - @llvm.nvvm.suld.<NDims>.array.v{2, 4}i16.clamp
568+
569+
; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i16.clamp
570+
571+
declare {i16,i16} @llvm.nvvm.suld.1d.array.v2i16.clamp(i64, i32, i32)
572+
define <2 x i16> @__clc_llvm_nvvm_suld_1d_array_v2i16_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline {
573+
entry:
574+
%0 = tail call {i16,i16} @llvm.nvvm.suld.1d.array.v2i16.clamp(i64 %img, i32 %idx, i32 %x);
575+
%1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0)
576+
ret <2 x i16> %1
577+
}
578+
579+
declare {i16,i16} @llvm.nvvm.suld.2d.array.v2i16.clamp(i64, i32, i32, i32)
580+
define <2 x i16> @__clc_llvm_nvvm_suld_2d_array_v2i16_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline {
581+
entry:
582+
%0 = tail call {i16,i16} @llvm.nvvm.suld.2d.array.v2i16.clamp(i64 %img, i32 %idx, i32 %x, i32 %y);
583+
%1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0)
584+
ret <2 x i16> %1
585+
}
586+
587+
declare {i16,i16} @llvm.nvvm.suld.3d.array.v2i16.clamp(i64, i32, i32, i32, i32)
588+
define <2 x i16> @__clc_llvm_nvvm_suld_3d_array_v2i16_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
589+
entry:
590+
%0 = tail call {i16,i16} @llvm.nvvm.suld.3d.array.v2i16.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z);
591+
%1 = tail call <2 x i16> @__clc_struct16_to_vector2({i16,i16} %0)
592+
ret <2 x i16> %1
593+
}
594+
595+
; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i16.clamp
596+
597+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.1d.array.v4i16.clamp(i64, i32, i32)
598+
define <4 x i16> @__clc_llvm_nvvm_suld_1d_array_v4i16_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline {
599+
entry:
600+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.1d.array.v4i16.clamp(i64 %img, i32 %idx, i32 %x);
601+
%1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0)
602+
ret <4 x i16> %1
603+
}
604+
605+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.2d.array.v4i16.clamp(i64, i32, i32, i32)
606+
define <4 x i16> @__clc_llvm_nvvm_suld_2d_array_v4i16_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline {
607+
entry:
608+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.2d.array.v4i16.clamp(i64 %img, i32 %idx, i32 %x, i32 %y);
609+
%1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0)
610+
ret <4 x i16> %1
611+
}
612+
613+
declare {i16,i16,i16,i16} @llvm.nvvm.suld.3d.array.v4i16.clamp(i64, i32, i32, i32, i32)
614+
define <4 x i16> @__clc_llvm_nvvm_suld_3d_array_v4i16_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
615+
entry:
616+
%0 = tail call {i16,i16,i16,i16} @llvm.nvvm.suld.3d.array.v4i16.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z);
617+
%1 = tail call <4 x i16> @__clc_struct16_to_vector({i16,i16,i16,i16} %0)
618+
ret <4 x i16> %1
619+
}
620+
621+
; - @llvm.nvvm.suld.<NDims>.array.v{2, 4}i32.clamp
622+
623+
; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i32.clamp
624+
625+
declare {i32,i32} @llvm.nvvm.suld.1d.array.v2i32.clamp(i64, i32, i32)
626+
define <2 x i32> @__clc_llvm_nvvm_suld_1d_array_v2i32_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline {
627+
entry:
628+
%0 = tail call {i32,i32} @llvm.nvvm.suld.1d.array.v2i32.clamp(i64 %img, i32 %idx, i32 %x);
629+
%1 = tail call <2 x i32> @__clc_struct32_to_vector2({i32,i32} %0)
630+
ret <2 x i32> %1
631+
}
632+
633+
declare {i32,i32} @llvm.nvvm.suld.2d.array.v2i32.clamp(i64, i32, i32, i32)
634+
define <2 x i32> @__clc_llvm_nvvm_suld_2d_array_v2i32_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline {
635+
entry:
636+
%0 = tail call {i32,i32} @llvm.nvvm.suld.2d.array.v2i32.clamp(i64 %img, i32 %idx, i32 %x, i32 %y);
637+
%1 = tail call <2 x i32> @__clc_struct32_to_vector2({i32,i32} %0)
638+
ret <2 x i32> %1
639+
}
640+
641+
declare {i32,i32} @llvm.nvvm.suld.3d.array.v2i32.clamp(i64, i32, i32, i32, i32)
642+
define <2 x i32> @__clc_llvm_nvvm_suld_3d_array_v2i32_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
643+
entry:
644+
%0 = tail call {i32,i32} @llvm.nvvm.suld.3d.array.v2i32.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z);
645+
%1 = tail call <2 x i32> @__clc_struct32_to_vector2({i32,i32} %0)
646+
ret <2 x i32> %1
647+
}
648+
649+
; - @llvm.nvvm.suld.<NDims>.array.v4i32.clamp
650+
651+
; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i32.clamp
652+
653+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.1d.array.v4i32.clamp(i64, i32, i32)
654+
define <4 x i32> @__clc_llvm_nvvm_suld_1d_array_v4i32_clamp(i64 %img, i32 %idx, i32 %x) nounwind alwaysinline {
655+
entry:
656+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.1d.array.v4i32.clamp(i64 %img, i32 %idx, i32 %x);
657+
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0)
658+
ret <4 x i32> %1
659+
}
660+
661+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.2d.array.v4i32.clamp(i64, i32, i32, i32)
662+
define <4 x i32> @__clc_llvm_nvvm_suld_2d_array_v4i32_clamp(i64 %img, i32 %idx, i32 %x, i32 %y) nounwind alwaysinline {
663+
entry:
664+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.2d.array.v4i32.clamp(i64 %img, i32 %idx, i32 %x, i32 %y);
665+
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0)
666+
ret <4 x i32> %1
667+
}
668+
669+
declare {i32,i32,i32,i32} @llvm.nvvm.suld.3d.array.v4i32.clamp(i64, i32, i32, i32, i32)
670+
define <4 x i32> @__clc_llvm_nvvm_suld_3d_array_v4i32_clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z) nounwind alwaysinline {
671+
entry:
672+
%0 = tail call {i32,i32,i32,i32} @llvm.nvvm.suld.3d.array.v4i32.clamp(i64 %img, i32 %idx, i32 %x, i32 %y, i32 %z);
673+
%1 = tail call <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %0) ret <4 x i32> %1
674+
}

0 commit comments

Comments
 (0)