AMDGPU/GlobalISel: Look through casts when legalizing vector indexing

arsenm · arsenm · commit f4a38c114e12 · 2020-02-09T18:02:10.000-05:00
We were failing to find constants that were casted. I feel like the
artifact combiner should have folded the constant in the trunc before
the custom lowering, but that doesn't happen.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1668,8 +1668,12 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
   // TODO: Should move some of this into LegalizerHelper.
 
   // TODO: Promote dynamic indexing of s16 to s32
-  // TODO: Dynamic s64 indexing is only legal for SGPR.
-  Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
+
+  // FIXME: Artifact combiner probably should have replaced the truncated
+  // constant before this, so we shouldn't need
+  // getConstantVRegValWithLookThrough.
+  Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
+    MI.getOperand(2).getReg(), MRI);
   if (!IdxVal) // Dynamic case will be selected to register indexing.
     return true;
 
@@ -1682,8 +1686,8 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
 
   B.setInstr(MI);
 
-  if (IdxVal.getValue() < VecTy.getNumElements())
-    B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
+  if (IdxVal->Value < VecTy.getNumElements())
+    B.buildExtract(Dst, Vec, IdxVal->Value * EltTy.getSizeInBits());
   else
     B.buildUndef(Dst);
 
@@ -1697,8 +1701,12 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
   // TODO: Should move some of this into LegalizerHelper.
 
   // TODO: Promote dynamic indexing of s16 to s32
-  // TODO: Dynamic s64 indexing is only legal for SGPR.
-  Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
+
+  // FIXME: Artifact combiner probably should have replaced the truncated
+  // constant before this, so we shouldn't need
+  // getConstantVRegValWithLookThrough.
+  Optional<ValueAndVReg> IdxVal = getConstantVRegValWithLookThrough(
+    MI.getOperand(3).getReg(), MRI);
   if (!IdxVal) // Dynamic case will be selected to register indexing.
     return true;
 
@@ -1712,8 +1720,8 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
 
   B.setInstr(MI);
 
-  if (IdxVal.getValue() < VecTy.getNumElements())
-    B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
+  if (IdxVal->Value < VecTy.getNumElements())
+    B.buildInsert(Dst, Vec, Ins, IdxVal->Value * EltTy.getSizeInBits());
   else
     B.buildUndef(Dst);
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-extract-vector-elt.mir
@@ -275,9 +275,8 @@ body: |
     ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV]], 1
     ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[UV1]], 1
     ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SEXT_INREG]](s32), [[SEXT_INREG1]](s32)
-    ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[C]](s1)
-    ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<2 x s32>), [[SEXT]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EVEC]](s32)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[BUILD_VECTOR]](<2 x s32>), 0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[EXTRACT]](s32)
     ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     %0:_(<2 x s1>) = G_IMPLICIT_DEF
     %1:_(s1) = G_CONSTANT i1 false
@@ -676,3 +675,22 @@ body: |
     %2:_(s64) = G_EXTRACT_VECTOR_ELT %0, %1
     $vgpr0_vgpr1 = COPY %2
 ...
+
+# Make sure we look through casts looking for a constant index.
+---
+name: extract_vector_elt_look_through_trunc_0_v4i32
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: extract_vector_elt_look_through_trunc_0_v4i32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY]](<4 x s32>), 0
+    ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(s64) = G_CONSTANT i64 0
+    %2:_(s32) = G_TRUNC %1
+    %3:_(s32) = G_EXTRACT_VECTOR_ELT %0, %2
+    $vgpr0 = COPY %3
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-insert-vector-elt.mir
@@ -127,10 +127,8 @@ body: |
     ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY2]], 8
-    ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[COPY1]](s32), [[SEXT_INREG]](s32)
-    ; CHECK: $vgpr0_vgpr1 = COPY [[IVEC]](<2 x s32>)
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK: $vgpr0_vgpr1 = COPY [[INSERT]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(s32) = COPY $vgpr2
     %2:_(s8) = G_CONSTANT i8 0
@@ -161,3 +159,24 @@ body: |
     %5:_(<2 x s32>) = G_ANYEXT %4
     $vgpr0_vgpr1 = COPY %5
 ...
+
+---
+name: insert_vector_elt_v4s32_s32_look_through_trunc_0
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
+
+    ; CHECK-LABEL: name: insert_vector_elt_v4s32_s32_look_through_trunc_0
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s32>) = G_INSERT [[COPY]], [[COPY1]](s32), 0
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[INSERT]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(s32) = COPY $vgpr4
+    %2:_(s64) = G_CONSTANT i64 0
+    %3:_(s32) = G_TRUNC %2
+    %4:_(<4 x s32>) = G_INSERT_VECTOR_ELT %0, %1, %3
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %4
+...