@@ -834,6 +834,300 @@ define amdgpu_kernel void @s_and_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
834
834
store i64 %and , ptr addrspace (1 ) %out , align 8
835
835
ret void
836
836
}
837
+
838
+ define <2 x i128 > @v_and_v2i128 (<2 x i128 > %a , <2 x i128 > %b ) {
839
+ ; GCN-LABEL: v_and_v2i128:
840
+ ; GCN: ; %bb.0:
841
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v8
843
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v9
844
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v10
845
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v11
846
+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v12
847
+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v13
848
+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v14
849
+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v15
850
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
851
+ ;
852
+ ; GFX10PLUS-LABEL: v_and_v2i128:
853
+ ; GFX10PLUS: ; %bb.0:
854
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v8
856
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v9
857
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v10
858
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v11
859
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v12
860
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v13
861
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v14
862
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v15
863
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
864
+ ;
865
+ ; GFX12-LABEL: v_and_v2i128:
866
+ ; GFX12: ; %bb.0:
867
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
868
+ ; GFX12-NEXT: s_wait_expcnt 0x0
869
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
870
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
871
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
872
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v8
873
+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v9
874
+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v10
875
+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v11
876
+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v12
877
+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v13
878
+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v14
879
+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v15
880
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
881
+ %and = and <2 x i128 > %a , %b
882
+ ret <2 x i128 > %and
883
+ }
884
+
885
+ define <2 x i128 > @v_and_v2i128_inline_imm (<2 x i128 > %a ) {
886
+ ; GCN-LABEL: v_and_v2i128_inline_imm:
887
+ ; GCN: ; %bb.0:
888
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889
+ ; GCN-NEXT: v_and_b32_e32 v0, 64, v0
890
+ ; GCN-NEXT: v_and_b32_e32 v4, 64, v4
891
+ ; GCN-NEXT: v_mov_b32_e32 v1, 0
892
+ ; GCN-NEXT: v_mov_b32_e32 v2, 0
893
+ ; GCN-NEXT: v_mov_b32_e32 v3, 0
894
+ ; GCN-NEXT: v_mov_b32_e32 v5, 0
895
+ ; GCN-NEXT: v_mov_b32_e32 v6, 0
896
+ ; GCN-NEXT: v_mov_b32_e32 v7, 0
897
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
898
+ ;
899
+ ; GFX10-LABEL: v_and_v2i128_inline_imm:
900
+ ; GFX10: ; %bb.0:
901
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902
+ ; GFX10-NEXT: v_and_b32_e32 v0, 64, v0
903
+ ; GFX10-NEXT: v_and_b32_e32 v4, 64, v4
904
+ ; GFX10-NEXT: v_mov_b32_e32 v1, 0
905
+ ; GFX10-NEXT: v_mov_b32_e32 v2, 0
906
+ ; GFX10-NEXT: v_mov_b32_e32 v3, 0
907
+ ; GFX10-NEXT: v_mov_b32_e32 v5, 0
908
+ ; GFX10-NEXT: v_mov_b32_e32 v6, 0
909
+ ; GFX10-NEXT: v_mov_b32_e32 v7, 0
910
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
911
+ ;
912
+ ; GFX11-LABEL: v_and_v2i128_inline_imm:
913
+ ; GFX11: ; %bb.0:
914
+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
915
+ ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0
916
+ ; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4
917
+ ; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
918
+ ; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
919
+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
920
+ ;
921
+ ; GFX12-LABEL: v_and_v2i128_inline_imm:
922
+ ; GFX12: ; %bb.0:
923
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
924
+ ; GFX12-NEXT: s_wait_expcnt 0x0
925
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
926
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
927
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
928
+ ; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0
929
+ ; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4
930
+ ; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
931
+ ; GFX12-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
932
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
933
+ %and = and <2 x i128 > %a , <i128 64 , i128 64 >
934
+ ret <2 x i128 > %and
935
+ }
936
+
937
+ define <3 x i128 > @v_and_v3i128 (<3 x i128 > %a , <3 x i128 > %b ) {
938
+ ; GCN-LABEL: v_and_v3i128:
939
+ ; GCN: ; %bb.0:
940
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
941
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v12
942
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v13
943
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v14
944
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v15
945
+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v16
946
+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v17
947
+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v18
948
+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v19
949
+ ; GCN-NEXT: v_and_b32_e32 v8, v8, v20
950
+ ; GCN-NEXT: v_and_b32_e32 v9, v9, v21
951
+ ; GCN-NEXT: v_and_b32_e32 v10, v10, v22
952
+ ; GCN-NEXT: v_and_b32_e32 v11, v11, v23
953
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
954
+ ;
955
+ ; GFX10PLUS-LABEL: v_and_v3i128:
956
+ ; GFX10PLUS: ; %bb.0:
957
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v12
959
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v13
960
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v14
961
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v15
962
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v16
963
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v17
964
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v18
965
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v19
966
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v8, v8, v20
967
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v9, v9, v21
968
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v10, v10, v22
969
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v11, v11, v23
970
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
971
+ ;
972
+ ; GFX12-LABEL: v_and_v3i128:
973
+ ; GFX12: ; %bb.0:
974
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
975
+ ; GFX12-NEXT: s_wait_expcnt 0x0
976
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
977
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
978
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
979
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v12
980
+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v13
981
+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v14
982
+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v15
983
+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v16
984
+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v17
985
+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v18
986
+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v19
987
+ ; GFX12-NEXT: v_and_b32_e32 v8, v8, v20
988
+ ; GFX12-NEXT: v_and_b32_e32 v9, v9, v21
989
+ ; GFX12-NEXT: v_and_b32_e32 v10, v10, v22
990
+ ; GFX12-NEXT: v_and_b32_e32 v11, v11, v23
991
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
992
+ %and = and <3 x i128 > %a , %b
993
+ ret <3 x i128 > %and
994
+ }
995
+
996
+ define <1 x i128 > @v_and_v1i128 (<1 x i128 > %a , <1 x i128 > %b ) {
997
+ ; GCN-LABEL: v_and_v1i128:
998
+ ; GCN: ; %bb.0:
999
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1000
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v4
1001
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v5
1002
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v6
1003
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v7
1004
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1005
+ ;
1006
+ ; GFX10PLUS-LABEL: v_and_v1i128:
1007
+ ; GFX10PLUS: ; %bb.0:
1008
+ ; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4
1010
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5
1011
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6
1012
+ ; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7
1013
+ ; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1014
+ ;
1015
+ ; GFX12-LABEL: v_and_v1i128:
1016
+ ; GFX12: ; %bb.0:
1017
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1018
+ ; GFX12-NEXT: s_wait_expcnt 0x0
1019
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
1020
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
1021
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1022
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v4
1023
+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v5
1024
+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v6
1025
+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v7
1026
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
1027
+ %and = and <1 x i128 > %a , %b
1028
+ ret <1 x i128 > %and
1029
+ }
1030
+
1031
+ define <2 x i256 > @v_and_v2i256 (<2 x i256 > %a , <2 x i256 > %b ) {
1032
+ ; GCN-LABEL: v_and_v2i256:
1033
+ ; GCN: ; %bb.0:
1034
+ ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035
+ ; GCN-NEXT: v_and_b32_e32 v0, v0, v16
1036
+ ; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32
1037
+ ; GCN-NEXT: v_and_b32_e32 v1, v1, v17
1038
+ ; GCN-NEXT: v_and_b32_e32 v2, v2, v18
1039
+ ; GCN-NEXT: v_and_b32_e32 v3, v3, v19
1040
+ ; GCN-NEXT: v_and_b32_e32 v4, v4, v20
1041
+ ; GCN-NEXT: v_and_b32_e32 v5, v5, v21
1042
+ ; GCN-NEXT: v_and_b32_e32 v6, v6, v22
1043
+ ; GCN-NEXT: v_and_b32_e32 v7, v7, v23
1044
+ ; GCN-NEXT: v_and_b32_e32 v8, v8, v24
1045
+ ; GCN-NEXT: v_and_b32_e32 v9, v9, v25
1046
+ ; GCN-NEXT: v_and_b32_e32 v10, v10, v26
1047
+ ; GCN-NEXT: v_and_b32_e32 v11, v11, v27
1048
+ ; GCN-NEXT: v_and_b32_e32 v12, v12, v28
1049
+ ; GCN-NEXT: v_and_b32_e32 v13, v13, v29
1050
+ ; GCN-NEXT: v_and_b32_e32 v14, v14, v30
1051
+ ; GCN-NEXT: s_waitcnt vmcnt(0)
1052
+ ; GCN-NEXT: v_and_b32_e32 v15, v15, v16
1053
+ ; GCN-NEXT: s_setpc_b64 s[30:31]
1054
+ ;
1055
+ ; GFX10-LABEL: v_and_v2i256:
1056
+ ; GFX10: ; %bb.0:
1057
+ ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058
+ ; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
1059
+ ; GFX10-NEXT: v_and_b32_e32 v0, v0, v16
1060
+ ; GFX10-NEXT: v_and_b32_e32 v1, v1, v17
1061
+ ; GFX10-NEXT: v_and_b32_e32 v2, v2, v18
1062
+ ; GFX10-NEXT: v_and_b32_e32 v3, v3, v19
1063
+ ; GFX10-NEXT: v_and_b32_e32 v4, v4, v20
1064
+ ; GFX10-NEXT: v_and_b32_e32 v5, v5, v21
1065
+ ; GFX10-NEXT: v_and_b32_e32 v6, v6, v22
1066
+ ; GFX10-NEXT: v_and_b32_e32 v7, v7, v23
1067
+ ; GFX10-NEXT: v_and_b32_e32 v8, v8, v24
1068
+ ; GFX10-NEXT: v_and_b32_e32 v9, v9, v25
1069
+ ; GFX10-NEXT: v_and_b32_e32 v10, v10, v26
1070
+ ; GFX10-NEXT: v_and_b32_e32 v11, v11, v27
1071
+ ; GFX10-NEXT: v_and_b32_e32 v12, v12, v28
1072
+ ; GFX10-NEXT: v_and_b32_e32 v13, v13, v29
1073
+ ; GFX10-NEXT: v_and_b32_e32 v14, v14, v30
1074
+ ; GFX10-NEXT: s_waitcnt vmcnt(0)
1075
+ ; GFX10-NEXT: v_and_b32_e32 v15, v15, v31
1076
+ ; GFX10-NEXT: s_setpc_b64 s[30:31]
1077
+ ;
1078
+ ; GFX11-LABEL: v_and_v2i256:
1079
+ ; GFX11: ; %bb.0:
1080
+ ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1081
+ ; GFX11-NEXT: scratch_load_b32 v31, off, s32
1082
+ ; GFX11-NEXT: v_and_b32_e32 v0, v0, v16
1083
+ ; GFX11-NEXT: v_and_b32_e32 v1, v1, v17
1084
+ ; GFX11-NEXT: v_and_b32_e32 v2, v2, v18
1085
+ ; GFX11-NEXT: v_and_b32_e32 v3, v3, v19
1086
+ ; GFX11-NEXT: v_and_b32_e32 v4, v4, v20
1087
+ ; GFX11-NEXT: v_and_b32_e32 v5, v5, v21
1088
+ ; GFX11-NEXT: v_and_b32_e32 v6, v6, v22
1089
+ ; GFX11-NEXT: v_and_b32_e32 v7, v7, v23
1090
+ ; GFX11-NEXT: v_and_b32_e32 v8, v8, v24
1091
+ ; GFX11-NEXT: v_and_b32_e32 v9, v9, v25
1092
+ ; GFX11-NEXT: v_and_b32_e32 v10, v10, v26
1093
+ ; GFX11-NEXT: v_and_b32_e32 v11, v11, v27
1094
+ ; GFX11-NEXT: v_and_b32_e32 v12, v12, v28
1095
+ ; GFX11-NEXT: v_and_b32_e32 v13, v13, v29
1096
+ ; GFX11-NEXT: v_and_b32_e32 v14, v14, v30
1097
+ ; GFX11-NEXT: s_waitcnt vmcnt(0)
1098
+ ; GFX11-NEXT: v_and_b32_e32 v15, v15, v31
1099
+ ; GFX11-NEXT: s_setpc_b64 s[30:31]
1100
+ ;
1101
+ ; GFX12-LABEL: v_and_v2i256:
1102
+ ; GFX12: ; %bb.0:
1103
+ ; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1104
+ ; GFX12-NEXT: s_wait_expcnt 0x0
1105
+ ; GFX12-NEXT: s_wait_samplecnt 0x0
1106
+ ; GFX12-NEXT: s_wait_bvhcnt 0x0
1107
+ ; GFX12-NEXT: s_wait_kmcnt 0x0
1108
+ ; GFX12-NEXT: scratch_load_b32 v31, off, s32
1109
+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v16
1110
+ ; GFX12-NEXT: v_and_b32_e32 v1, v1, v17
1111
+ ; GFX12-NEXT: v_and_b32_e32 v2, v2, v18
1112
+ ; GFX12-NEXT: v_and_b32_e32 v3, v3, v19
1113
+ ; GFX12-NEXT: v_and_b32_e32 v4, v4, v20
1114
+ ; GFX12-NEXT: v_and_b32_e32 v5, v5, v21
1115
+ ; GFX12-NEXT: v_and_b32_e32 v6, v6, v22
1116
+ ; GFX12-NEXT: v_and_b32_e32 v7, v7, v23
1117
+ ; GFX12-NEXT: v_and_b32_e32 v8, v8, v24
1118
+ ; GFX12-NEXT: v_and_b32_e32 v9, v9, v25
1119
+ ; GFX12-NEXT: v_and_b32_e32 v10, v10, v26
1120
+ ; GFX12-NEXT: v_and_b32_e32 v11, v11, v27
1121
+ ; GFX12-NEXT: v_and_b32_e32 v12, v12, v28
1122
+ ; GFX12-NEXT: v_and_b32_e32 v13, v13, v29
1123
+ ; GFX12-NEXT: v_and_b32_e32 v14, v14, v30
1124
+ ; GFX12-NEXT: s_wait_loadcnt 0x0
1125
+ ; GFX12-NEXT: v_and_b32_e32 v15, v15, v31
1126
+ ; GFX12-NEXT: s_setpc_b64 s[30:31]
1127
+ %and = and <2 x i256 > %a , %b
1128
+ ret <2 x i256 > %and
1129
+ }
1130
+
837
1131
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
838
1132
; GFX11-FAKE16: {{.*}}
839
1133
; GFX11-TRUE16: {{.*}}
0 commit comments