@@ -857,6 +857,80 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, floa
857
857
declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32 (i32 , i32 , float , half , half , half , half , float , float , float , <8 x i32 >, <4 x i32 >, i1 , i32 , i32 ) #1
858
858
declare <2 x float > @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32 (i32 , i32 , float , half , half , half , half , float , float , float , <8 x i32 >, <4 x i32 >, i1 , i32 , i32 ) #1
859
859
860
+ define amdgpu_ps <4 x float > @sample_d_1d_g16_a16 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %dsdh , half %dsdv , half %s ) {
861
+ ; GFX10-LABEL: sample_d_1d_g16_a16:
862
+ ; GFX10: ; %bb.0: ; %main_body
863
+ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
864
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
865
+ ; GFX10-NEXT: ; return to shader part epilog
866
+ ;
867
+ ; GFX10GISEL-LABEL: sample_d_1d_g16_a16:
868
+ ; GFX10GISEL: ; %bb.0: ; %main_body
869
+ ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
870
+ ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
871
+ ; GFX10GISEL-NEXT: ; return to shader part epilog
872
+ main_body:
873
+ %v = call <4 x float > @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16 (i32 15 , half %dsdh , half %dsdv , half %s , <8 x i32 > %rsrc , <4 x i32 > %samp , i1 0 , i32 0 , i32 0 )
874
+ ret <4 x float > %v
875
+ }
876
+
877
+ define amdgpu_ps <4 x float > @sample_d_2d_g16_a16 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %dsdh , half %dtdh , half %dsdv , half %dtdv , half %s , half %t ) {
878
+ ; GFX10-LABEL: sample_d_2d_g16_a16:
879
+ ; GFX10: ; %bb.0: ; %main_body
880
+ ; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100
881
+ ; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100
882
+ ; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
883
+ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
884
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
885
+ ; GFX10-NEXT: ; return to shader part epilog
886
+ ;
887
+ ; GFX10GISEL-LABEL: sample_d_2d_g16_a16:
888
+ ; GFX10GISEL: ; %bb.0: ; %main_body
889
+ ; GFX10GISEL-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
890
+ ; GFX10GISEL-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
891
+ ; GFX10GISEL-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
892
+ ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
893
+ ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
894
+ ; GFX10GISEL-NEXT: ; return to shader part epilog
895
+ main_body:
896
+ %v = call <4 x float > @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16 (i32 15 , half %dsdh , half %dtdh , half %dsdv , half %dtdv , half %s , half %t , <8 x i32 > %rsrc , <4 x i32 > %samp , i1 0 , i32 0 , i32 0 )
897
+ ret <4 x float > %v
898
+ }
899
+
900
+ define amdgpu_ps <4 x float > @sample_d_3d_g16_a16 (<8 x i32 > inreg %rsrc , <4 x i32 > inreg %samp , half %dsdh , half %dtdh , half %drdh , half %dsdv , half %dtdv , half %drdv , half %s , half %t , half %r ) {
901
+ ; GFX10-LABEL: sample_d_3d_g16_a16:
902
+ ; GFX10: ; %bb.0: ; %main_body
903
+ ; GFX10-NEXT: v_mov_b32_e32 v12, v8
904
+ ; GFX10-NEXT: v_mov_b32_e32 v10, v5
905
+ ; GFX10-NEXT: v_mov_b32_e32 v8, v2
906
+ ; GFX10-NEXT: v_perm_b32 v11, v7, v6, 0x5040100
907
+ ; GFX10-NEXT: v_perm_b32 v9, v4, v3, 0x5040100
908
+ ; GFX10-NEXT: v_perm_b32 v7, v1, v0, 0x5040100
909
+ ; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
910
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
911
+ ; GFX10-NEXT: ; return to shader part epilog
912
+ ;
913
+ ; GFX10GISEL-LABEL: sample_d_3d_g16_a16:
914
+ ; GFX10GISEL: ; %bb.0: ; %main_body
915
+ ; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3
916
+ ; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v7
917
+ ; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2
918
+ ; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8
919
+ ; GFX10GISEL-NEXT: v_perm_b32 v2, v1, v0, 0x5040100
920
+ ; GFX10GISEL-NEXT: v_perm_b32 v4, v4, v9, 0x5040100
921
+ ; GFX10GISEL-NEXT: v_perm_b32 v6, v10, v6, 0x5040100
922
+ ; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
923
+ ; GFX10GISEL-NEXT: s_waitcnt vmcnt(0)
924
+ ; GFX10GISEL-NEXT: ; return to shader part epilog
925
+ main_body:
926
+ %v = call <4 x float > @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16 (i32 15 , half %dsdh , half %dtdh , half %drdh , half %dsdv , half %dtdv , half %drdv , half %s , half %t , half %r , <8 x i32 > %rsrc , <4 x i32 > %samp , i1 0 , i32 0 , i32 0 )
927
+ ret <4 x float > %v
928
+ }
929
+
930
+ declare <4 x float > @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16 (i32 , half , half , half , <8 x i32 >, <4 x i32 >, i1 , i32 , i32 )
931
+ declare <4 x float > @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16 (i32 , half , half , half , half , half , half , <8 x i32 >, <4 x i32 >, i1 , i32 , i32 )
932
+ declare <4 x float > @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16 (i32 , half , half , half , half , half , half , half , half , half , <8 x i32 >, <4 x i32 >, i1 , i32 , i32 )
933
+
860
934
attributes #0 = { nounwind }
861
935
attributes #1 = { nounwind readonly }
862
936
attributes #2 = { nounwind readnone }
0 commit comments