@@ -42,6 +42,14 @@ define <4 x i32> @__clc_struct32_to_vector({i32,i32,i32,i32} %s) nounwind always
42
42
ret <4 x i32 > %v
43
43
}
44
44
45
+ define <2 x i32 > @__clc_struct32_to_vector2 ({i32 ,i32 } %s ) nounwind alwaysinline {
46
+ %a = alloca {i32 ,i32 }
47
+ store {i32 ,i32 } %s , {i32 ,i32 }* %a
48
+ %bc = bitcast {i32 ,i32 } * %a to <2 x i32 > *
49
+ %v = load <2 x i32 >, <2 x i32 > * %bc , align 128
50
+ ret <2 x i32 > %v
51
+ }
52
+
45
53
define <4 x float > @__clc_structf32_to_vector ({float ,float ,float ,float } %s ) nounwind alwaysinline {
46
54
%a = alloca {float ,float ,float ,float }
47
55
store {float ,float ,float ,float } %s , {float ,float ,float ,float }* %a
@@ -485,3 +493,182 @@ entry:
485
493
%1 = tail call <4 x i32 >@__clc_struct32_to_vector ({i32 ,i32 ,i32 ,i32 } %0 )
486
494
ret <4 x i32 > %1
487
495
}
496
+
497
+ ; <--- IMAGE ARRAYS --->
498
+
499
+ ; Surface Reads
500
+ ;
501
+ ; @llvm.nvvm.suld.<NDims>.array.v<NChannels><DType>.clamp
502
+ ;
503
+ ; <NDims> = { 1d, 2d, 3d }
504
+ ; <NChannels> = { 2, 4 }
505
+ ; <Dtype> = { i8, i16, i32 }
506
+ ;
507
+ ; Note: The case of NChannels=1 doesn't need to be handled here as it can be
508
+ ; called directly.
509
+
510
+
511
+ ; @llvm.nvvm.suld.<NDims>.array.v<NChannels>{i8, i16, i32}.clamp
512
+
513
+ ; - @llvm.nvvm.suld.<NDims>.array.v{2, 4}i8.clamp
514
+
515
+ ; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i8.clamp
516
+
517
+ declare {i16 ,i16 } @llvm.nvvm.suld.1d.array.v2i8.clamp (i64 , i32 , i32 )
518
+ define <2 x i16 > @__clc_llvm_nvvm_suld_1d_array_v2i8_clamp (i64 %img , i32 %idx , i32 %x ) nounwind alwaysinline {
519
+ entry:
520
+ %0 = tail call {i16 ,i16 } @llvm.nvvm.suld.1d.array.v2i8.clamp (i64 %img , i32 %idx , i32 %x );
521
+ %1 = tail call <2 x i16 > @__clc_struct16_to_vector2 ({i16 ,i16 } %0 )
522
+ ret <2 x i16 > %1
523
+ }
524
+
525
+ declare {i16 ,i16 } @llvm.nvvm.suld.2d.array.v2i8.clamp (i64 , i32 , i32 , i32 )
526
+ define <2 x i16 > @__clc_llvm_nvvm_suld_2d_array_v2i8_clamp (i64 %img , i32 %idx , i32 %x , i32 %y ) nounwind alwaysinline {
527
+ entry:
528
+ %0 = tail call {i16 ,i16 } @llvm.nvvm.suld.2d.array.v2i8.clamp (i64 %img , i32 %idx , i32 %x , i32 %y );
529
+ %1 = tail call <2 x i16 > @__clc_struct16_to_vector2 ({i16 ,i16 } %0 )
530
+ ret <2 x i16 > %1
531
+ }
532
+
533
+ declare {i16 ,i16 } @llvm.nvvm.suld.3d.array.v2i8.clamp (i64 , i32 , i32 , i32 , i32 )
534
+ define <2 x i16 > @__clc_llvm_nvvm_suld_3d_array_v2i8_clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z ) nounwind alwaysinline {
535
+ entry:
536
+ %0 = tail call {i16 ,i16 } @llvm.nvvm.suld.3d.array.v2i8.clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z );
537
+ %1 = tail call <2 x i16 > @__clc_struct16_to_vector2 ({i16 ,i16 } %0 )
538
+ ret <2 x i16 > %1
539
+ }
540
+
541
+ ; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i8.clamp
542
+
543
+ declare {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.1d.array.v4i8.clamp (i64 , i32 , i32 )
544
+ define <4 x i16 > @__clc_llvm_nvvm_suld_1d_array_v4i8_clamp (i64 %img , i32 %idx , i32 %x ) nounwind alwaysinline {
545
+ entry:
546
+ %0 = tail call {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.1d.array.v4i8.clamp (i64 %img , i32 %idx , i32 %x );
547
+ %1 = tail call <4 x i16 > @__clc_struct16_to_vector ({i16 ,i16 ,i16 ,i16 } %0 )
548
+ ret <4 x i16 > %1
549
+ }
550
+
551
+ declare {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.2d.array.v4i8.clamp (i64 , i32 , i32 , i32 )
552
+ define <4 x i16 > @__clc_llvm_nvvm_suld_2d_array_v4i8_clamp (i64 %img , i32 %idx , i32 %x , i32 %y ) nounwind alwaysinline {
553
+ entry:
554
+ %0 = tail call {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.2d.array.v4i8.clamp (i64 %img , i32 %idx , i32 %x , i32 %y );
555
+ %1 = tail call <4 x i16 > @__clc_struct16_to_vector ({i16 ,i16 ,i16 ,i16 } %0 )
556
+ ret <4 x i16 > %1
557
+ }
558
+
559
+ declare {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.3d.array.v4i8.clamp (i64 , i32 , i32 , i32 , i32 )
560
+ define <4 x i16 > @__clc_llvm_nvvm_suld_3d_array_v4i8_clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z ) nounwind alwaysinline {
561
+ entry:
562
+ %0 = tail call {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.3d.array.v4i8.clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z );
563
+ %1 = tail call <4 x i16 > @__clc_struct16_to_vector ({i16 ,i16 ,i16 ,i16 } %0 )
564
+ ret <4 x i16 > %1
565
+ }
566
+
567
+ ; - @llvm.nvvm.suld.<NDims>.array.v{2, 4}i16.clamp
568
+
569
+ ; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i16.clamp
570
+
571
+ declare {i16 ,i16 } @llvm.nvvm.suld.1d.array.v2i16.clamp (i64 , i32 , i32 )
572
+ define <2 x i16 > @__clc_llvm_nvvm_suld_1d_array_v2i16_clamp (i64 %img , i32 %idx , i32 %x ) nounwind alwaysinline {
573
+ entry:
574
+ %0 = tail call {i16 ,i16 } @llvm.nvvm.suld.1d.array.v2i16.clamp (i64 %img , i32 %idx , i32 %x );
575
+ %1 = tail call <2 x i16 > @__clc_struct16_to_vector2 ({i16 ,i16 } %0 )
576
+ ret <2 x i16 > %1
577
+ }
578
+
579
+ declare {i16 ,i16 } @llvm.nvvm.suld.2d.array.v2i16.clamp (i64 , i32 , i32 , i32 )
580
+ define <2 x i16 > @__clc_llvm_nvvm_suld_2d_array_v2i16_clamp (i64 %img , i32 %idx , i32 %x , i32 %y ) nounwind alwaysinline {
581
+ entry:
582
+ %0 = tail call {i16 ,i16 } @llvm.nvvm.suld.2d.array.v2i16.clamp (i64 %img , i32 %idx , i32 %x , i32 %y );
583
+ %1 = tail call <2 x i16 > @__clc_struct16_to_vector2 ({i16 ,i16 } %0 )
584
+ ret <2 x i16 > %1
585
+ }
586
+
587
+ declare {i16 ,i16 } @llvm.nvvm.suld.3d.array.v2i16.clamp (i64 , i32 , i32 , i32 , i32 )
588
+ define <2 x i16 > @__clc_llvm_nvvm_suld_3d_array_v2i16_clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z ) nounwind alwaysinline {
589
+ entry:
590
+ %0 = tail call {i16 ,i16 } @llvm.nvvm.suld.3d.array.v2i16.clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z );
591
+ %1 = tail call <2 x i16 > @__clc_struct16_to_vector2 ({i16 ,i16 } %0 )
592
+ ret <2 x i16 > %1
593
+ }
594
+
595
+ ; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i16.clamp
596
+
597
+ declare {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.1d.array.v4i16.clamp (i64 , i32 , i32 )
598
+ define <4 x i16 > @__clc_llvm_nvvm_suld_1d_array_v4i16_clamp (i64 %img , i32 %idx , i32 %x ) nounwind alwaysinline {
599
+ entry:
600
+ %0 = tail call {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.1d.array.v4i16.clamp (i64 %img , i32 %idx , i32 %x );
601
+ %1 = tail call <4 x i16 > @__clc_struct16_to_vector ({i16 ,i16 ,i16 ,i16 } %0 )
602
+ ret <4 x i16 > %1
603
+ }
604
+
605
+ declare {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.2d.array.v4i16.clamp (i64 , i32 , i32 , i32 )
606
+ define <4 x i16 > @__clc_llvm_nvvm_suld_2d_array_v4i16_clamp (i64 %img , i32 %idx , i32 %x , i32 %y ) nounwind alwaysinline {
607
+ entry:
608
+ %0 = tail call {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.2d.array.v4i16.clamp (i64 %img , i32 %idx , i32 %x , i32 %y );
609
+ %1 = tail call <4 x i16 > @__clc_struct16_to_vector ({i16 ,i16 ,i16 ,i16 } %0 )
610
+ ret <4 x i16 > %1
611
+ }
612
+
613
+ declare {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.3d.array.v4i16.clamp (i64 , i32 , i32 , i32 , i32 )
614
+ define <4 x i16 > @__clc_llvm_nvvm_suld_3d_array_v4i16_clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z ) nounwind alwaysinline {
615
+ entry:
616
+ %0 = tail call {i16 ,i16 ,i16 ,i16 } @llvm.nvvm.suld.3d.array.v4i16.clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z );
617
+ %1 = tail call <4 x i16 > @__clc_struct16_to_vector ({i16 ,i16 ,i16 ,i16 } %0 )
618
+ ret <4 x i16 > %1
619
+ }
620
+
621
+ ; - @llvm.nvvm.suld.<NDims>.array.v{2, 4}i32.clamp
622
+
623
+ ; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v2i32.clamp
624
+
625
+ declare {i32 ,i32 } @llvm.nvvm.suld.1d.array.v2i32.clamp (i64 , i32 , i32 )
626
+ define <2 x i32 > @__clc_llvm_nvvm_suld_1d_array_v2i32_clamp (i64 %img , i32 %idx , i32 %x ) nounwind alwaysinline {
627
+ entry:
628
+ %0 = tail call {i32 ,i32 } @llvm.nvvm.suld.1d.array.v2i32.clamp (i64 %img , i32 %idx , i32 %x );
629
+ %1 = tail call <2 x i32 > @__clc_struct32_to_vector2 ({i32 ,i32 } %0 )
630
+ ret <2 x i32 > %1
631
+ }
632
+
633
+ declare {i32 ,i32 } @llvm.nvvm.suld.2d.array.v2i32.clamp (i64 , i32 , i32 , i32 )
634
+ define <2 x i32 > @__clc_llvm_nvvm_suld_2d_array_v2i32_clamp (i64 %img , i32 %idx , i32 %x , i32 %y ) nounwind alwaysinline {
635
+ entry:
636
+ %0 = tail call {i32 ,i32 } @llvm.nvvm.suld.2d.array.v2i32.clamp (i64 %img , i32 %idx , i32 %x , i32 %y );
637
+ %1 = tail call <2 x i32 > @__clc_struct32_to_vector2 ({i32 ,i32 } %0 )
638
+ ret <2 x i32 > %1
639
+ }
640
+
641
+ declare {i32 ,i32 } @llvm.nvvm.suld.3d.array.v2i32.clamp (i64 , i32 , i32 , i32 , i32 )
642
+ define <2 x i32 > @__clc_llvm_nvvm_suld_3d_array_v2i32_clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z ) nounwind alwaysinline {
643
+ entry:
644
+ %0 = tail call {i32 ,i32 } @llvm.nvvm.suld.3d.array.v2i32.clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z );
645
+ %1 = tail call <2 x i32 > @__clc_struct32_to_vector2 ({i32 ,i32 } %0 )
646
+ ret <2 x i32 > %1
647
+ }
648
+
649
+ ; - @llvm.nvvm.suld.<NDims>.array.v4i32.clamp
650
+
651
+ ; - - @llvm.nvvm.suld.{1d, 2d, 3d}.array.v4i32.clamp
652
+
653
+ declare {i32 ,i32 ,i32 ,i32 } @llvm.nvvm.suld.1d.array.v4i32.clamp (i64 , i32 , i32 )
654
+ define <4 x i32 > @__clc_llvm_nvvm_suld_1d_array_v4i32_clamp (i64 %img , i32 %idx , i32 %x ) nounwind alwaysinline {
655
+ entry:
656
+ %0 = tail call {i32 ,i32 ,i32 ,i32 } @llvm.nvvm.suld.1d.array.v4i32.clamp (i64 %img , i32 %idx , i32 %x );
657
+ %1 = tail call <4 x i32 > @__clc_struct32_to_vector ({i32 ,i32 ,i32 ,i32 } %0 )
658
+ ret <4 x i32 > %1
659
+ }
660
+
661
+ declare {i32 ,i32 ,i32 ,i32 } @llvm.nvvm.suld.2d.array.v4i32.clamp (i64 , i32 , i32 , i32 )
662
+ define <4 x i32 > @__clc_llvm_nvvm_suld_2d_array_v4i32_clamp (i64 %img , i32 %idx , i32 %x , i32 %y ) nounwind alwaysinline {
663
+ entry:
664
+ %0 = tail call {i32 ,i32 ,i32 ,i32 } @llvm.nvvm.suld.2d.array.v4i32.clamp (i64 %img , i32 %idx , i32 %x , i32 %y );
665
+ %1 = tail call <4 x i32 > @__clc_struct32_to_vector ({i32 ,i32 ,i32 ,i32 } %0 )
666
+ ret <4 x i32 > %1
667
+ }
668
+
669
+ declare {i32 ,i32 ,i32 ,i32 } @llvm.nvvm.suld.3d.array.v4i32.clamp (i64 , i32 , i32 , i32 , i32 )
670
+ define <4 x i32 > @__clc_llvm_nvvm_suld_3d_array_v4i32_clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z ) nounwind alwaysinline {
671
+ entry:
672
+ %0 = tail call {i32 ,i32 ,i32 ,i32 } @llvm.nvvm.suld.3d.array.v4i32.clamp (i64 %img , i32 %idx , i32 %x , i32 %y , i32 %z );
673
+ %1 = tail call <4 x i32 > @__clc_struct32_to_vector ({i32 ,i32 ,i32 ,i32 } %0 ) ret <4 x i32 > %1
674
+ }
0 commit comments