Skip to content

Commit f4a38c1

Browse files
committed
AMDGPU/GlobalISel: Look through casts when legalizing vector indexing
We were failing to find constants that were casted. I feel like the artifact combiner should have folded the constant in the trunc before the custom lowering, but that doesn't happen.
1 parent 00115d7 commit f4a38c1

File tree

3 files changed

+60
-15
lines changed

3 files changed

+60
-15
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,8 +1668,12 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
16681668
// TODO: Should move some of this into LegalizerHelper.
16691669

16701670
// TODO: Promote dynamic indexing of s16 to s32
1671-
// TODO: Dynamic s64 indexing is only legal for SGPR.
1672-
Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
1671+
1672+
// FIXME: Artifact combiner probably should have replaced the truncated
1673+
// constant before this, so we shouldn't need
1674+
// getConstantVRegValWithLookThrough.
1675+
Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
1676+
MI.getOperand(2).getReg(), MRI);
16731677
if (!IdxVal) // Dynamic case will be selected to register indexing.
16741678
return true;
16751679

@@ -1682,8 +1686,8 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
16821686

16831687
B.setInstr(MI);
16841688

1685-
if (IdxVal.getValue() < VecTy.getNumElements())
1686-
B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
1689+
if (IdxVal->Value < VecTy.getNumElements())
1690+
B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits());
16871691
else
16881692
B.buildUndef(Dst);
16891693

@@ -1697,8 +1701,12 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
16971701
// TODO: Should move some of this into LegalizerHelper.
16981702

16991703
// TODO: Promote dynamic indexing of s16 to s32
1700-
// TODO: Dynamic s64 indexing is only legal for SGPR.
1701-
Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
1704+
1705+
// FIXME: Artifact combiner probably should have replaced the truncated
1706+
// constant before this, so we shouldn't need
1707+
// getConstantVRegValWithLookThrough.
1708+
Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
1709+
MI.getOperand(3).getReg(), MRI);
17021710
if (!IdxVal) // Dynamic case will be selected to register indexing.
17031711
return true;
17041712

@@ -1712,8 +1720,8 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
17121720

17131721
B.setInstr(MI);
17141722

1715-
if (IdxVal.getValue() < VecTy.getNumElements())
1716-
B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
1723+
if (IdxVal->Value < VecTy.getNumElements())
1724+
B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits());
17171725
else
17181726
B.buildUndef(Dst);
17191727

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -275,9 +275,8 @@ body: |
275275
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1
276276
; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1
277277
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
278-
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
279-
; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[SEXT]](s32)
280-
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
278+
; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0
279+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
281280
; CHECK: $vgpr0 = COPY [[COPY1]](s32)
282281
%0:_(<2 x s1>) = G_IMPLICIT_DEF
283282
%1:_(s1) = G_CONSTANT i1 false
@@ -676,3 +675,22 @@ body: |
676675
%2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
677676
$vgpr0_vgpr1 = COPY %2
678677
...
678+
679+
# Make sure we look through casts looking for a constant index.
680+
---
681+
name: extract_vector_elt_look_through_trunc_0_v4i32
682+
683+
body: |
684+
bb.0:
685+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
686+
; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32
687+
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
688+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
689+
; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
690+
; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
691+
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
692+
%1:_(s64) = G_CONSTANT i64 0
693+
%2:_(s32) = G_TRUNC %1
694+
%3:_(s32) = G_EXTRACT_VECTOR_ELT %0, %2
695+
$vgpr0 = COPY %3
696+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,10 +127,8 @@ body: |
127127
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
128128
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
129129
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
130-
; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
131-
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
132-
; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[SEXT_INREG]](s32)
133-
; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
130+
; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
131+
; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
134132
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
135133
%1:_(s32) = COPY $vgpr2
136134
%2:_(s8) = G_CONSTANT i8 0
@@ -161,3 +159,24 @@ body: |
161159
%5:_(<2 x s32>) = G_ANYEXT %4
162160
$vgpr0_vgpr1 = COPY %5
163161
...
162+
163+
---
164+
name: insert_vector_elt_v4s32_s32_look_through_trunc_0
165+
166+
body: |
167+
bb.0:
168+
liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
169+
170+
; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0
171+
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
172+
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
173+
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
174+
; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
175+
; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
176+
%0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
177+
%1:_(s32) = COPY $vgpr4
178+
%2:_(s64) = G_CONSTANT i64 0
179+
%3:_(s32) = G_TRUNC %2
180+
%4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %3
181+
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
182+
...

0 commit comments

Comments
 (0)