-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalISel] prevent G_UNMERGE_VALUES for vectors with different elements #133335
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This commit adds support for using different source and destination vector element sizes for G_UNMERGE_VALUES, e.g.: `%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)` This LLVM defect was identified via the AMD Fuzzing project.
@llvm/pr-subscribers-llvm-globalisel Author: Robert Imschweiler (ro-i) ChangesThis commit adds support for using different source and destination vector element sizes for G_UNMERGE_VALUES, e.g.: This LLVM defect was identified via the AMD Fuzzing project. Full diff: https://github.com/llvm/llvm-project/pull/133335.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a9f80860124fb..4fcad22587f66 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8281,9 +8281,8 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
const unsigned NumDst = MI.getNumOperands() - 1;
Register SrcReg = MI.getOperand(NumDst).getReg();
- Register Dst0Reg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(Dst0Reg);
- if (DstTy.isPointer())
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (DstTy.getScalarType().isPointer())
return UnableToLegalize; // TODO
SrcReg = coerceToScalar(SrcReg);
@@ -8293,14 +8292,25 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
// Expand scalarizing unmerge as bitcast to integer and shift.
LLT IntTy = MRI.getType(SrcReg);
- MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
-
- const unsigned DstSize = DstTy.getSizeInBits();
- unsigned Offset = DstSize;
- for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
- auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
- auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
- MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
+ const unsigned DstSize = DstTy.getScalarSizeInBits();
+ SmallVector<Register> VectorElems;
+ Register Shift;
+ for (unsigned I = 0, Offset = 0; I != NumDst; Offset += DstSize) {
+ if (Offset) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
+ Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt).getReg(0);
+ } else {
+ Shift = SrcReg;
+ }
+ if (DstTy.isVector()) {
+ VectorElems.emplace_back(MIRBuilder.buildTrunc(DstTy.getScalarType(), Shift).getReg(0));
+ if (VectorElems.size() == DstTy.getNumElements()) {
+ MIRBuilder.buildBuildVector(MI.getOperand(I++), VectorElems);
+ VectorElems.clear();
+ }
+ } else {
+ MIRBuilder.buildTrunc(MI.getOperand(I++), Shift);
+ }
}
MI.eraseFromParent();
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index a7dbceb88c4c8..7cbf41038f6e4 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1510,11 +1510,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
LLT SrcTy = MRI->getType(MI->getOperand(NumDsts).getReg());
if (DstTy.isVector()) {
- // This case is the converse of G_CONCAT_VECTORS.
- if (!SrcTy.isVector() ||
- (SrcTy.getScalarType() != DstTy.getScalarType() &&
- !SrcTy.isPointerVector()) ||
- SrcTy.isScalableVector() != DstTy.isScalableVector() ||
+ // This case is the converse of G_CONCAT_VECTORS, but relaxed since
+ // G_UNMERGE_VALUES can handle src and dst vectors with different
+ // element sizes:
+ // %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
+ if (SrcTy.isScalableVector() != DstTy.isScalableVector() ||
SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits())
report("G_UNMERGE_VALUES source operand does not match vector "
"destination operands",
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
index 5eca04c02a9f9..96889f7a957b2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -6508,3 +6508,58 @@ entry:
%insert = insertelement <5 x double> %vec, double %val, i32 %idx
ret <5 x double> %insert
}
+
+; Found by fuzzer, reduced with llvm-reduce.
+define amdgpu_kernel void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) {
+; GPRIDX-LABEL: insert_very_small_from_very_large:
+; GPRIDX: ; %bb.0: ; %bb
+; GPRIDX-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
+; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
+; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
+; GPRIDX-NEXT: s_lshr_b32 s2, s12, 1
+; GPRIDX-NEXT: s_and_b32 s2, s2, 1
+; GPRIDX-NEXT: s_lshl_b32 s2, s2, 1
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
+; GPRIDX-NEXT: flat_store_byte v[0:1], v2
+; GPRIDX-NEXT: s_endpgm
+;
+; GFX10-LABEL: insert_very_small_from_very_large:
+; GFX10: ; %bb.0: ; %bb
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_lshr_b32 s2, s12, 1
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: s_and_b32 s2, s2, 1
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: s_lshl_b32 s2, s2, 1
+; GFX10-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-NEXT: flat_store_byte v[0:1], v2
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: insert_very_small_from_very_large:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x0
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x40
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_lshr_b32 s2, s8, 1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_and_b32 s2, s2, 1
+; GFX11-NEXT: v_mov_b32_e32 v1, s1
+; GFX11-NEXT: s_lshl_b32 s2, s2, 1
+; GFX11-NEXT: v_mov_b32_e32 v2, s2
+; GFX11-NEXT: flat_store_b8 v[0:1], v2
+; GFX11-NEXT: s_endpgm
+bb:
+ %0 = bitcast <32 x i16> %L3 to i512
+ %1 = trunc i512 %0 to i8
+ %2 = trunc i8 %1 to i2
+ %3 = bitcast i2 %2 to <2 x i1>
+ %I = insertelement <2 x i1> %3, i1 false, i32 0
+ store <2 x i1> %I, ptr %ptr, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
index c231aa8334d45..3500df7c99b6e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
@@ -96,6 +96,41 @@ body: |
$vgpr1 = COPY %4
...
+---
+name: test_unmerge_v2s8_v2s16
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: test_unmerge_v2s8_v2s16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
+ %3:_(<2 x s16>) = G_ANYEXT %1
+ %4:_(<2 x s16>) = G_ANYEXT %2
+ $vgpr0 = COPY %3
+ $vgpr1 = COPY %4
+...
+
---
name: test_unmerge_s16_v3s16
body: |
@@ -120,6 +155,50 @@ body: |
$vgpr2 = COPY %6
...
+---
+name: test_unmerge_v2s8_v3s16
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_unmerge_v2s8_v3s16
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C3]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
+ %0:_(<3 x s16>) = G_IMPLICIT_DEF
+ %1:_(<2 x s8>), %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
+ %4:_(<2 x s16>) = G_ANYEXT %1
+ %5:_(<2 x s16>) = G_ANYEXT %2
+ %6:_(<2 x s16>) = G_ANYEXT %3
+ $vgpr0 = COPY %4
+ $vgpr1 = COPY %5
+ $vgpr2 = COPY %6
+...
+
---
name: test_unmerge_s16_v4s16
@@ -191,6 +270,62 @@ body: |
$vgpr5 = COPY %12
...
+---
+name: test_unmerge_v4s8_v6s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_unmerge_v4s8_v6s16
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8), [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s8)
+ ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s8)
+ ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s8)
+ ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32)
+ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8), [[UV32:%[0-9]+]]:_(s8), [[UV33:%[0-9]+]]:_(s8), [[UV34:%[0-9]+]]:_(s8), [[UV35:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV32]](s8)
+ ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV33]](s8)
+ ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV34]](s8)
+ ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV35]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR2]](<4 x s32>)
+ %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(<4 x s8>), %2:_(<4 x s8>), %3:_(<4 x s8>) = G_UNMERGE_VALUES %0
+ %4:_(<4 x s32>) = G_ANYEXT %1
+ %5:_(<4 x s32>) = G_ANYEXT %2
+ %6:_(<4 x s32>) = G_ANYEXT %3
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
+ $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5
+ $vgpr8_vgpr9_vgpr10_vgpr11 = COPY %6
+...
+
+---
+name: test_unmerge_v3s32_v6s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_unmerge_v3s32_v6s16
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
+ %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(<3 x s32>) = G_UNMERGE_VALUES %0
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+...
+
---
name: test_unmerge_s8_s16
@@ -1090,3 +1225,23 @@ body: |
$vgpr9_vgpr10_vgpr11 = COPY %8
...
+
+---
+name: test_unmerge_v3s32_v12s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_unmerge_v3s32_v12s16
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
+ ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>)
+ %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ %1:_(<3 x s32>), %2:_(<3 x s32>) = G_UNMERGE_VALUES %0
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+ $vgpr3_vgpr4_vgpr5 = COPY %2
+
+...
|
@llvm/pr-subscribers-backend-amdgpu Author: Robert Imschweiler (ro-i) ChangesThis commit adds support for using different source and destination vector element sizes for G_UNMERGE_VALUES, e.g.: This LLVM defect was identified via the AMD Fuzzing project. Full diff: https://github.com/llvm/llvm-project/pull/133335.diff 4 Files Affected:
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a9f80860124fb..4fcad22587f66 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -8281,9 +8281,8 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
const unsigned NumDst = MI.getNumOperands() - 1;
Register SrcReg = MI.getOperand(NumDst).getReg();
- Register Dst0Reg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(Dst0Reg);
- if (DstTy.isPointer())
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (DstTy.getScalarType().isPointer())
return UnableToLegalize; // TODO
SrcReg = coerceToScalar(SrcReg);
@@ -8293,14 +8292,25 @@ LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
// Expand scalarizing unmerge as bitcast to integer and shift.
LLT IntTy = MRI.getType(SrcReg);
- MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
-
- const unsigned DstSize = DstTy.getSizeInBits();
- unsigned Offset = DstSize;
- for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
- auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
- auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
- MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
+ const unsigned DstSize = DstTy.getScalarSizeInBits();
+ SmallVector<Register> VectorElems;
+ Register Shift;
+ for (unsigned I = 0, Offset = 0; I != NumDst; Offset += DstSize) {
+ if (Offset) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
+ Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt).getReg(0);
+ } else {
+ Shift = SrcReg;
+ }
+ if (DstTy.isVector()) {
+ VectorElems.emplace_back(MIRBuilder.buildTrunc(DstTy.getScalarType(), Shift).getReg(0));
+ if (VectorElems.size() == DstTy.getNumElements()) {
+ MIRBuilder.buildBuildVector(MI.getOperand(I++), VectorElems);
+ VectorElems.clear();
+ }
+ } else {
+ MIRBuilder.buildTrunc(MI.getOperand(I++), Shift);
+ }
}
MI.eraseFromParent();
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index a7dbceb88c4c8..7cbf41038f6e4 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1510,11 +1510,11 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
LLT SrcTy = MRI->getType(MI->getOperand(NumDsts).getReg());
if (DstTy.isVector()) {
- // This case is the converse of G_CONCAT_VECTORS.
- if (!SrcTy.isVector() ||
- (SrcTy.getScalarType() != DstTy.getScalarType() &&
- !SrcTy.isPointerVector()) ||
- SrcTy.isScalableVector() != DstTy.isScalableVector() ||
+ // This case is the converse of G_CONCAT_VECTORS, but relaxed since
+ // G_UNMERGE_VALUES can handle src and dst vectors with different
+ // element sizes:
+ // %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
+ if (SrcTy.isScalableVector() != DstTy.isScalableVector() ||
SrcTy.getSizeInBits() != NumDsts * DstTy.getSizeInBits())
report("G_UNMERGE_VALUES source operand does not match vector "
"destination operands",
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
index 5eca04c02a9f9..96889f7a957b2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -6508,3 +6508,58 @@ entry:
%insert = insertelement <5 x double> %vec, double %val, i32 %idx
ret <5 x double> %insert
}
+
+; Found by fuzzer, reduced with llvm-reduce.
+define amdgpu_kernel void @insert_very_small_from_very_large(<32 x i16> %L3, ptr %ptr) {
+; GPRIDX-LABEL: insert_very_small_from_very_large:
+; GPRIDX: ; %bb.0: ; %bb
+; GPRIDX-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
+; GPRIDX-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
+; GPRIDX-NEXT: s_waitcnt lgkmcnt(0)
+; GPRIDX-NEXT: s_lshr_b32 s2, s12, 1
+; GPRIDX-NEXT: s_and_b32 s2, s2, 1
+; GPRIDX-NEXT: s_lshl_b32 s2, s2, 1
+; GPRIDX-NEXT: v_mov_b32_e32 v0, s0
+; GPRIDX-NEXT: v_mov_b32_e32 v2, s2
+; GPRIDX-NEXT: v_mov_b32_e32 v1, s1
+; GPRIDX-NEXT: flat_store_byte v[0:1], v2
+; GPRIDX-NEXT: s_endpgm
+;
+; GFX10-LABEL: insert_very_small_from_very_large:
+; GFX10: ; %bb.0: ; %bb
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx16 s[12:27], s[8:9], 0x0
+; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x40
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_lshr_b32 s2, s12, 1
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: s_and_b32 s2, s2, 1
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: s_lshl_b32 s2, s2, 1
+; GFX10-NEXT: v_mov_b32_e32 v2, s2
+; GFX10-NEXT: flat_store_byte v[0:1], v2
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: insert_very_small_from_very_large:
+; GFX11: ; %bb.0: ; %bb
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b512 s[8:23], s[4:5], 0x0
+; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x40
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_lshr_b32 s2, s8, 1
+; GFX11-NEXT: v_mov_b32_e32 v0, s0
+; GFX11-NEXT: s_and_b32 s2, s2, 1
+; GFX11-NEXT: v_mov_b32_e32 v1, s1
+; GFX11-NEXT: s_lshl_b32 s2, s2, 1
+; GFX11-NEXT: v_mov_b32_e32 v2, s2
+; GFX11-NEXT: flat_store_b8 v[0:1], v2
+; GFX11-NEXT: s_endpgm
+bb:
+ %0 = bitcast <32 x i16> %L3 to i512
+ %1 = trunc i512 %0 to i8
+ %2 = trunc i8 %1 to i2
+ %3 = bitcast i2 %2 to <2 x i1>
+ %I = insertelement <2 x i1> %3, i1 false, i32 0
+ store <2 x i1> %I, ptr %ptr, align 1
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
index c231aa8334d45..3500df7c99b6e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
@@ -96,6 +96,41 @@ body: |
$vgpr1 = COPY %4
...
+---
+name: test_unmerge_v2s8_v2s16
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: test_unmerge_v2s8_v2s16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C2]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST2]](<2 x s16>)
+ %0:_(<2 x s16>) = COPY $vgpr0
+ %1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
+ %3:_(<2 x s16>) = G_ANYEXT %1
+ %4:_(<2 x s16>) = G_ANYEXT %2
+ $vgpr0 = COPY %3
+ $vgpr1 = COPY %4
+...
+
---
name: test_unmerge_s16_v3s16
body: |
@@ -120,6 +155,50 @@ body: |
$vgpr2 = COPY %6
...
+---
+name: test_unmerge_v2s8_v3s16
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: test_unmerge_v2s8_v3s16
+ ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[DEF]](<4 x s16>)
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C1]](s32)
+ ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C2]](s32)
+ ; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+ ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST]], [[C3]]
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C3]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[LSHR1]], [[SHL1]]
+ ; CHECK-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C3]]
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[LSHR3]], [[C3]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C1]](s32)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL2]]
+ ; CHECK-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+ ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST3]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr1 = COPY [[BITCAST4]](<2 x s16>)
+ ; CHECK-NEXT: $vgpr2 = COPY [[BITCAST5]](<2 x s16>)
+ %0:_(<3 x s16>) = G_IMPLICIT_DEF
+ %1:_(<2 x s8>), %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
+ %4:_(<2 x s16>) = G_ANYEXT %1
+ %5:_(<2 x s16>) = G_ANYEXT %2
+ %6:_(<2 x s16>) = G_ANYEXT %3
+ $vgpr0 = COPY %4
+ $vgpr1 = COPY %5
+ $vgpr2 = COPY %6
+...
+
---
name: test_unmerge_s16_v4s16
@@ -191,6 +270,62 @@ body: |
$vgpr5 = COPY %12
...
+---
+name: test_unmerge_v4s8_v6s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_unmerge_v4s8_v6s16
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8), [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8), [[UV20:%[0-9]+]]:_(s8), [[UV21:%[0-9]+]]:_(s8), [[UV22:%[0-9]+]]:_(s8), [[UV23:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV16]](s8)
+ ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV17]](s8)
+ ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV18]](s8)
+ ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV19]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT4]](s32), [[ANYEXT5]](s32), [[ANYEXT6]](s32), [[ANYEXT7]](s32)
+ ; CHECK-NEXT: [[UV24:%[0-9]+]]:_(s8), [[UV25:%[0-9]+]]:_(s8), [[UV26:%[0-9]+]]:_(s8), [[UV27:%[0-9]+]]:_(s8), [[UV28:%[0-9]+]]:_(s8), [[UV29:%[0-9]+]]:_(s8), [[UV30:%[0-9]+]]:_(s8), [[UV31:%[0-9]+]]:_(s8), [[UV32:%[0-9]+]]:_(s8), [[UV33:%[0-9]+]]:_(s8), [[UV34:%[0-9]+]]:_(s8), [[UV35:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV32]](s8)
+ ; CHECK-NEXT: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV33]](s8)
+ ; CHECK-NEXT: [[ANYEXT10:%[0-9]+]]:_(s32) = G_ANYEXT [[UV34]](s8)
+ ; CHECK-NEXT: [[ANYEXT11:%[0-9]+]]:_(s32) = G_ANYEXT [[UV35]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT8]](s32), [[ANYEXT9]](s32), [[ANYEXT10]](s32), [[ANYEXT11]](s32)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR1]](<4 x s32>)
+ ; CHECK-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = COPY [[BUILD_VECTOR2]](<4 x s32>)
+ %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(<4 x s8>), %2:_(<4 x s8>), %3:_(<4 x s8>) = G_UNMERGE_VALUES %0
+ %4:_(<4 x s32>) = G_ANYEXT %1
+ %5:_(<4 x s32>) = G_ANYEXT %2
+ %6:_(<4 x s32>) = G_ANYEXT %3
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
+ $vgpr4_vgpr5_vgpr6_vgpr7 = COPY %5
+ $vgpr8_vgpr9_vgpr10_vgpr11 = COPY %6
+...
+
+---
+name: test_unmerge_v3s32_v6s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: test_unmerge_v3s32_v6s16
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
+ %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+ %1:_(<3 x s32>) = G_UNMERGE_VALUES %0
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+...
+
---
name: test_unmerge_s8_s16
@@ -1090,3 +1225,23 @@ body: |
$vgpr9_vgpr10_vgpr11 = COPY %8
...
+
+---
+name: test_unmerge_v3s32_v12s16
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+
+ ; CHECK-LABEL: name: test_unmerge_v3s32_v12s16
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<3 x s32>), [[UV1:%[0-9]+]]:_(<3 x s32>) = G_UNMERGE_VALUES [[COPY]](<12 x s16>)
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2 = COPY [[UV]](<3 x s32>)
+ ; CHECK-NEXT: $vgpr3_vgpr4_vgpr5 = COPY [[UV1]](<3 x s32>)
+ %0:_(<12 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
+ %1:_(<3 x s32>), %2:_(<3 x s32>) = G_UNMERGE_VALUES %0
+ $vgpr0_vgpr1_vgpr2 = COPY %1
+ $vgpr3_vgpr4_vgpr5 = COPY %2
+
+...
|
Ping |
Ping (already updated the commit message, so this seems like a nice, small fix that is adapted to the feedback) |
That case should already be rejected by MachineVerifier ( llvm-project/llvm/lib/CodeGen/MachineVerifier.cpp Line 1507 in b2bf017
|
But afaics, the MachineVerifier cannot catch invalid instructions that are created during legalization before they are being handled themselves in the same pass. Because that's what happens with my reproducer:
|
OK, but that's a technicality. The instructions are still invalid and should not have been created, so the correct fix is to not create them in the first place. |
Yes, that's what the current version of this PR tries to do. That's why I added the check in |
OK, sounds good, but then please update the description. It's a bit misleading to say the patch rejects different source and destination vector elements for G_UNMERGE_VALUES. |
Thanks for the feedback, should be clearer now |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems reasonable
merged, thanks for the reviews! |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/18509 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/21042 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/10/builds/7567 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/20518 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/20578 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/187/builds/7020 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/25234 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/19588 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/30569 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/108/builds/14215 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/28721 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/168/builds/13258 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/95/builds/14652 Here is the relevant piece of the build log for the reference
|
something seems to be wrong. I'll investigate and reopen |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/125/builds/8482 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/20372 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/55/builds/12940 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/35177 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/85/builds/10008 Here is the relevant piece of the build log for the reference
|
…with different elements" (#144650) Reverts llvm/llvm-project#133335
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/24/builds/9600 Here is the relevant piece of the build log for the reference
|
…ents (llvm#133335) This commit prevents building a G_UNMERGE_VALUES instruction with different source and destination vector elements in `LegalizationArtifactCombiner::ArtifactValueFinder::tryCombineMergeLike()`, e.g.: `%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)` This LLVM defect was identified via the AMD Fuzzing project.
…ent elements" (llvm#144650) Reverts llvm#133335
This commit prevents building a G_UNMERGE_VALUES instruction with different source and destination vector elements in
LegalizationArtifactCombiner::ArtifactValueFinder::tryCombineMergeLike()
, e.g.:%1:_(<2 x s8>), %2:_(<2 x s8>) = G_UNMERGE_VALUES %0:_(<2 x s16>)
This LLVM defect was identified via the AMD Fuzzing project.
(Updated commit message after implementing a different approach according to reviews.)