Skip to content

Commit 3a5af23

Browse files
authored
[GlobalISel][AMDGPU] Fix handling of v2i128 type for AND, OR, XOR (#138574)
Current behavior crashes the compiler. This bug was found using the AMDGPU Fuzzing project. Fixes SWDEV-508816.
1 parent 9d907a2 commit 3a5af23

File tree

4 files changed

+844
-6
lines changed

4 files changed

+844
-6
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -872,12 +872,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
872872
// Report legal for any types we can handle anywhere. For the cases only legal
873873
// on the SALU, RegBankSelect will be able to re-legalize.
874874
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
875-
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
876-
.clampScalar(0, S32, S64)
877-
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
878-
.fewerElementsIf(vectorWiderThan(0, 64), fewerEltsToSize64Vector(0))
879-
.widenScalarToNextPow2(0)
880-
.scalarize(0);
875+
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
876+
.clampScalar(0, S32, S64)
877+
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
878+
.fewerElementsIf(
879+
all(vectorWiderThan(0, 64), scalarOrEltNarrowerThan(0, 64)),
880+
fewerEltsToSize64Vector(0))
881+
.widenScalarToNextPow2(0)
882+
.scalarize(0);
881883

882884
getActionDefinitionsBuilder(
883885
{G_UADDO, G_USUBO, G_UADDE, G_SADDE, G_USUBE, G_SSUBE})

llvm/test/CodeGen/AMDGPU/GlobalISel/and.ll

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -834,6 +834,300 @@ define amdgpu_kernel void @s_and_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
834834
store i64 %and, ptr addrspace(1) %out, align 8
835835
ret void
836836
}
837+
838+
define <2 x i128> @v_and_v2i128(<2 x i128> %a, <2 x i128> %b) {
839+
; GCN-LABEL: v_and_v2i128:
840+
; GCN: ; %bb.0:
841+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
842+
; GCN-NEXT: v_and_b32_e32 v0, v0, v8
843+
; GCN-NEXT: v_and_b32_e32 v1, v1, v9
844+
; GCN-NEXT: v_and_b32_e32 v2, v2, v10
845+
; GCN-NEXT: v_and_b32_e32 v3, v3, v11
846+
; GCN-NEXT: v_and_b32_e32 v4, v4, v12
847+
; GCN-NEXT: v_and_b32_e32 v5, v5, v13
848+
; GCN-NEXT: v_and_b32_e32 v6, v6, v14
849+
; GCN-NEXT: v_and_b32_e32 v7, v7, v15
850+
; GCN-NEXT: s_setpc_b64 s[30:31]
851+
;
852+
; GFX10PLUS-LABEL: v_and_v2i128:
853+
; GFX10PLUS: ; %bb.0:
854+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
855+
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v8
856+
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v9
857+
; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v10
858+
; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v11
859+
; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v12
860+
; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v13
861+
; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v14
862+
; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v15
863+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
864+
;
865+
; GFX12-LABEL: v_and_v2i128:
866+
; GFX12: ; %bb.0:
867+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
868+
; GFX12-NEXT: s_wait_expcnt 0x0
869+
; GFX12-NEXT: s_wait_samplecnt 0x0
870+
; GFX12-NEXT: s_wait_bvhcnt 0x0
871+
; GFX12-NEXT: s_wait_kmcnt 0x0
872+
; GFX12-NEXT: v_and_b32_e32 v0, v0, v8
873+
; GFX12-NEXT: v_and_b32_e32 v1, v1, v9
874+
; GFX12-NEXT: v_and_b32_e32 v2, v2, v10
875+
; GFX12-NEXT: v_and_b32_e32 v3, v3, v11
876+
; GFX12-NEXT: v_and_b32_e32 v4, v4, v12
877+
; GFX12-NEXT: v_and_b32_e32 v5, v5, v13
878+
; GFX12-NEXT: v_and_b32_e32 v6, v6, v14
879+
; GFX12-NEXT: v_and_b32_e32 v7, v7, v15
880+
; GFX12-NEXT: s_setpc_b64 s[30:31]
881+
%and = and <2 x i128> %a, %b
882+
ret <2 x i128> %and
883+
}
884+
885+
define <2 x i128> @v_and_v2i128_inline_imm(<2 x i128> %a) {
886+
; GCN-LABEL: v_and_v2i128_inline_imm:
887+
; GCN: ; %bb.0:
888+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889+
; GCN-NEXT: v_and_b32_e32 v0, 64, v0
890+
; GCN-NEXT: v_and_b32_e32 v4, 64, v4
891+
; GCN-NEXT: v_mov_b32_e32 v1, 0
892+
; GCN-NEXT: v_mov_b32_e32 v2, 0
893+
; GCN-NEXT: v_mov_b32_e32 v3, 0
894+
; GCN-NEXT: v_mov_b32_e32 v5, 0
895+
; GCN-NEXT: v_mov_b32_e32 v6, 0
896+
; GCN-NEXT: v_mov_b32_e32 v7, 0
897+
; GCN-NEXT: s_setpc_b64 s[30:31]
898+
;
899+
; GFX10-LABEL: v_and_v2i128_inline_imm:
900+
; GFX10: ; %bb.0:
901+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902+
; GFX10-NEXT: v_and_b32_e32 v0, 64, v0
903+
; GFX10-NEXT: v_and_b32_e32 v4, 64, v4
904+
; GFX10-NEXT: v_mov_b32_e32 v1, 0
905+
; GFX10-NEXT: v_mov_b32_e32 v2, 0
906+
; GFX10-NEXT: v_mov_b32_e32 v3, 0
907+
; GFX10-NEXT: v_mov_b32_e32 v5, 0
908+
; GFX10-NEXT: v_mov_b32_e32 v6, 0
909+
; GFX10-NEXT: v_mov_b32_e32 v7, 0
910+
; GFX10-NEXT: s_setpc_b64 s[30:31]
911+
;
912+
; GFX11-LABEL: v_and_v2i128_inline_imm:
913+
; GFX11: ; %bb.0:
914+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
915+
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0
916+
; GFX11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4
917+
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
918+
; GFX11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
919+
; GFX11-NEXT: s_setpc_b64 s[30:31]
920+
;
921+
; GFX12-LABEL: v_and_v2i128_inline_imm:
922+
; GFX12: ; %bb.0:
923+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
924+
; GFX12-NEXT: s_wait_expcnt 0x0
925+
; GFX12-NEXT: s_wait_samplecnt 0x0
926+
; GFX12-NEXT: s_wait_bvhcnt 0x0
927+
; GFX12-NEXT: s_wait_kmcnt 0x0
928+
; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 64, v0
929+
; GFX12-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v4, 64, v4
930+
; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v5, 0
931+
; GFX12-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0
932+
; GFX12-NEXT: s_setpc_b64 s[30:31]
933+
%and = and <2 x i128> %a, <i128 64, i128 64>
934+
ret <2 x i128> %and
935+
}
936+
937+
define <3 x i128> @v_and_v3i128(<3 x i128> %a, <3 x i128> %b) {
938+
; GCN-LABEL: v_and_v3i128:
939+
; GCN: ; %bb.0:
940+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
941+
; GCN-NEXT: v_and_b32_e32 v0, v0, v12
942+
; GCN-NEXT: v_and_b32_e32 v1, v1, v13
943+
; GCN-NEXT: v_and_b32_e32 v2, v2, v14
944+
; GCN-NEXT: v_and_b32_e32 v3, v3, v15
945+
; GCN-NEXT: v_and_b32_e32 v4, v4, v16
946+
; GCN-NEXT: v_and_b32_e32 v5, v5, v17
947+
; GCN-NEXT: v_and_b32_e32 v6, v6, v18
948+
; GCN-NEXT: v_and_b32_e32 v7, v7, v19
949+
; GCN-NEXT: v_and_b32_e32 v8, v8, v20
950+
; GCN-NEXT: v_and_b32_e32 v9, v9, v21
951+
; GCN-NEXT: v_and_b32_e32 v10, v10, v22
952+
; GCN-NEXT: v_and_b32_e32 v11, v11, v23
953+
; GCN-NEXT: s_setpc_b64 s[30:31]
954+
;
955+
; GFX10PLUS-LABEL: v_and_v3i128:
956+
; GFX10PLUS: ; %bb.0:
957+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958+
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v12
959+
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v13
960+
; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v14
961+
; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v15
962+
; GFX10PLUS-NEXT: v_and_b32_e32 v4, v4, v16
963+
; GFX10PLUS-NEXT: v_and_b32_e32 v5, v5, v17
964+
; GFX10PLUS-NEXT: v_and_b32_e32 v6, v6, v18
965+
; GFX10PLUS-NEXT: v_and_b32_e32 v7, v7, v19
966+
; GFX10PLUS-NEXT: v_and_b32_e32 v8, v8, v20
967+
; GFX10PLUS-NEXT: v_and_b32_e32 v9, v9, v21
968+
; GFX10PLUS-NEXT: v_and_b32_e32 v10, v10, v22
969+
; GFX10PLUS-NEXT: v_and_b32_e32 v11, v11, v23
970+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
971+
;
972+
; GFX12-LABEL: v_and_v3i128:
973+
; GFX12: ; %bb.0:
974+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
975+
; GFX12-NEXT: s_wait_expcnt 0x0
976+
; GFX12-NEXT: s_wait_samplecnt 0x0
977+
; GFX12-NEXT: s_wait_bvhcnt 0x0
978+
; GFX12-NEXT: s_wait_kmcnt 0x0
979+
; GFX12-NEXT: v_and_b32_e32 v0, v0, v12
980+
; GFX12-NEXT: v_and_b32_e32 v1, v1, v13
981+
; GFX12-NEXT: v_and_b32_e32 v2, v2, v14
982+
; GFX12-NEXT: v_and_b32_e32 v3, v3, v15
983+
; GFX12-NEXT: v_and_b32_e32 v4, v4, v16
984+
; GFX12-NEXT: v_and_b32_e32 v5, v5, v17
985+
; GFX12-NEXT: v_and_b32_e32 v6, v6, v18
986+
; GFX12-NEXT: v_and_b32_e32 v7, v7, v19
987+
; GFX12-NEXT: v_and_b32_e32 v8, v8, v20
988+
; GFX12-NEXT: v_and_b32_e32 v9, v9, v21
989+
; GFX12-NEXT: v_and_b32_e32 v10, v10, v22
990+
; GFX12-NEXT: v_and_b32_e32 v11, v11, v23
991+
; GFX12-NEXT: s_setpc_b64 s[30:31]
992+
%and = and <3 x i128> %a, %b
993+
ret <3 x i128> %and
994+
}
995+
996+
define <1 x i128> @v_and_v1i128(<1 x i128> %a, <1 x i128> %b) {
997+
; GCN-LABEL: v_and_v1i128:
998+
; GCN: ; %bb.0:
999+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1000+
; GCN-NEXT: v_and_b32_e32 v0, v0, v4
1001+
; GCN-NEXT: v_and_b32_e32 v1, v1, v5
1002+
; GCN-NEXT: v_and_b32_e32 v2, v2, v6
1003+
; GCN-NEXT: v_and_b32_e32 v3, v3, v7
1004+
; GCN-NEXT: s_setpc_b64 s[30:31]
1005+
;
1006+
; GFX10PLUS-LABEL: v_and_v1i128:
1007+
; GFX10PLUS: ; %bb.0:
1008+
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009+
; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v4
1010+
; GFX10PLUS-NEXT: v_and_b32_e32 v1, v1, v5
1011+
; GFX10PLUS-NEXT: v_and_b32_e32 v2, v2, v6
1012+
; GFX10PLUS-NEXT: v_and_b32_e32 v3, v3, v7
1013+
; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
1014+
;
1015+
; GFX12-LABEL: v_and_v1i128:
1016+
; GFX12: ; %bb.0:
1017+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1018+
; GFX12-NEXT: s_wait_expcnt 0x0
1019+
; GFX12-NEXT: s_wait_samplecnt 0x0
1020+
; GFX12-NEXT: s_wait_bvhcnt 0x0
1021+
; GFX12-NEXT: s_wait_kmcnt 0x0
1022+
; GFX12-NEXT: v_and_b32_e32 v0, v0, v4
1023+
; GFX12-NEXT: v_and_b32_e32 v1, v1, v5
1024+
; GFX12-NEXT: v_and_b32_e32 v2, v2, v6
1025+
; GFX12-NEXT: v_and_b32_e32 v3, v3, v7
1026+
; GFX12-NEXT: s_setpc_b64 s[30:31]
1027+
%and = and <1 x i128> %a, %b
1028+
ret <1 x i128> %and
1029+
}
1030+
1031+
define <2 x i256> @v_and_v2i256(<2 x i256> %a, <2 x i256> %b) {
1032+
; GCN-LABEL: v_and_v2i256:
1033+
; GCN: ; %bb.0:
1034+
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035+
; GCN-NEXT: v_and_b32_e32 v0, v0, v16
1036+
; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32
1037+
; GCN-NEXT: v_and_b32_e32 v1, v1, v17
1038+
; GCN-NEXT: v_and_b32_e32 v2, v2, v18
1039+
; GCN-NEXT: v_and_b32_e32 v3, v3, v19
1040+
; GCN-NEXT: v_and_b32_e32 v4, v4, v20
1041+
; GCN-NEXT: v_and_b32_e32 v5, v5, v21
1042+
; GCN-NEXT: v_and_b32_e32 v6, v6, v22
1043+
; GCN-NEXT: v_and_b32_e32 v7, v7, v23
1044+
; GCN-NEXT: v_and_b32_e32 v8, v8, v24
1045+
; GCN-NEXT: v_and_b32_e32 v9, v9, v25
1046+
; GCN-NEXT: v_and_b32_e32 v10, v10, v26
1047+
; GCN-NEXT: v_and_b32_e32 v11, v11, v27
1048+
; GCN-NEXT: v_and_b32_e32 v12, v12, v28
1049+
; GCN-NEXT: v_and_b32_e32 v13, v13, v29
1050+
; GCN-NEXT: v_and_b32_e32 v14, v14, v30
1051+
; GCN-NEXT: s_waitcnt vmcnt(0)
1052+
; GCN-NEXT: v_and_b32_e32 v15, v15, v16
1053+
; GCN-NEXT: s_setpc_b64 s[30:31]
1054+
;
1055+
; GFX10-LABEL: v_and_v2i256:
1056+
; GFX10: ; %bb.0:
1057+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1058+
; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32
1059+
; GFX10-NEXT: v_and_b32_e32 v0, v0, v16
1060+
; GFX10-NEXT: v_and_b32_e32 v1, v1, v17
1061+
; GFX10-NEXT: v_and_b32_e32 v2, v2, v18
1062+
; GFX10-NEXT: v_and_b32_e32 v3, v3, v19
1063+
; GFX10-NEXT: v_and_b32_e32 v4, v4, v20
1064+
; GFX10-NEXT: v_and_b32_e32 v5, v5, v21
1065+
; GFX10-NEXT: v_and_b32_e32 v6, v6, v22
1066+
; GFX10-NEXT: v_and_b32_e32 v7, v7, v23
1067+
; GFX10-NEXT: v_and_b32_e32 v8, v8, v24
1068+
; GFX10-NEXT: v_and_b32_e32 v9, v9, v25
1069+
; GFX10-NEXT: v_and_b32_e32 v10, v10, v26
1070+
; GFX10-NEXT: v_and_b32_e32 v11, v11, v27
1071+
; GFX10-NEXT: v_and_b32_e32 v12, v12, v28
1072+
; GFX10-NEXT: v_and_b32_e32 v13, v13, v29
1073+
; GFX10-NEXT: v_and_b32_e32 v14, v14, v30
1074+
; GFX10-NEXT: s_waitcnt vmcnt(0)
1075+
; GFX10-NEXT: v_and_b32_e32 v15, v15, v31
1076+
; GFX10-NEXT: s_setpc_b64 s[30:31]
1077+
;
1078+
; GFX11-LABEL: v_and_v2i256:
1079+
; GFX11: ; %bb.0:
1080+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1081+
; GFX11-NEXT: scratch_load_b32 v31, off, s32
1082+
; GFX11-NEXT: v_and_b32_e32 v0, v0, v16
1083+
; GFX11-NEXT: v_and_b32_e32 v1, v1, v17
1084+
; GFX11-NEXT: v_and_b32_e32 v2, v2, v18
1085+
; GFX11-NEXT: v_and_b32_e32 v3, v3, v19
1086+
; GFX11-NEXT: v_and_b32_e32 v4, v4, v20
1087+
; GFX11-NEXT: v_and_b32_e32 v5, v5, v21
1088+
; GFX11-NEXT: v_and_b32_e32 v6, v6, v22
1089+
; GFX11-NEXT: v_and_b32_e32 v7, v7, v23
1090+
; GFX11-NEXT: v_and_b32_e32 v8, v8, v24
1091+
; GFX11-NEXT: v_and_b32_e32 v9, v9, v25
1092+
; GFX11-NEXT: v_and_b32_e32 v10, v10, v26
1093+
; GFX11-NEXT: v_and_b32_e32 v11, v11, v27
1094+
; GFX11-NEXT: v_and_b32_e32 v12, v12, v28
1095+
; GFX11-NEXT: v_and_b32_e32 v13, v13, v29
1096+
; GFX11-NEXT: v_and_b32_e32 v14, v14, v30
1097+
; GFX11-NEXT: s_waitcnt vmcnt(0)
1098+
; GFX11-NEXT: v_and_b32_e32 v15, v15, v31
1099+
; GFX11-NEXT: s_setpc_b64 s[30:31]
1100+
;
1101+
; GFX12-LABEL: v_and_v2i256:
1102+
; GFX12: ; %bb.0:
1103+
; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
1104+
; GFX12-NEXT: s_wait_expcnt 0x0
1105+
; GFX12-NEXT: s_wait_samplecnt 0x0
1106+
; GFX12-NEXT: s_wait_bvhcnt 0x0
1107+
; GFX12-NEXT: s_wait_kmcnt 0x0
1108+
; GFX12-NEXT: scratch_load_b32 v31, off, s32
1109+
; GFX12-NEXT: v_and_b32_e32 v0, v0, v16
1110+
; GFX12-NEXT: v_and_b32_e32 v1, v1, v17
1111+
; GFX12-NEXT: v_and_b32_e32 v2, v2, v18
1112+
; GFX12-NEXT: v_and_b32_e32 v3, v3, v19
1113+
; GFX12-NEXT: v_and_b32_e32 v4, v4, v20
1114+
; GFX12-NEXT: v_and_b32_e32 v5, v5, v21
1115+
; GFX12-NEXT: v_and_b32_e32 v6, v6, v22
1116+
; GFX12-NEXT: v_and_b32_e32 v7, v7, v23
1117+
; GFX12-NEXT: v_and_b32_e32 v8, v8, v24
1118+
; GFX12-NEXT: v_and_b32_e32 v9, v9, v25
1119+
; GFX12-NEXT: v_and_b32_e32 v10, v10, v26
1120+
; GFX12-NEXT: v_and_b32_e32 v11, v11, v27
1121+
; GFX12-NEXT: v_and_b32_e32 v12, v12, v28
1122+
; GFX12-NEXT: v_and_b32_e32 v13, v13, v29
1123+
; GFX12-NEXT: v_and_b32_e32 v14, v14, v30
1124+
; GFX12-NEXT: s_wait_loadcnt 0x0
1125+
; GFX12-NEXT: v_and_b32_e32 v15, v15, v31
1126+
; GFX12-NEXT: s_setpc_b64 s[30:31]
1127+
%and = and <2 x i256> %a, %b
1128+
ret <2 x i256> %and
1129+
}
1130+
8371131
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
8381132
; GFX11-FAKE16: {{.*}}
8391133
; GFX11-TRUE16: {{.*}}

0 commit comments

Comments
 (0)