[SDAG] fold insert_vector_elt with undef index

rotateright · rotateright · commit 85a2146c1559 · 2019-10-27T15:28:43.000-04:00
Similar to: rG4c47617627fb This makes the DAG behavior consistent with IR's insertelement. https://bugs.llvm.org/show_bug.cgi?id=42689 I've tried to maintain test intent for AArch64 and WebAssembly by replacing undef index operands with something else.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16574,10 +16574,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   SDValue EltNo = N->getOperand(2);
   SDLoc DL(N);
 
-  // If the inserted element is an UNDEF, just use the input vector.
-  if (InVal.isUndef())
-    return InVec;
-
   EVT VT = InVec.getValueType();
   unsigned NumElts = VT.getVectorNumElements();
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5506,6 +5506,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
     if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
       return getUNDEF(VT);
+
+    // Undefined index can be assumed out-of-bounds, so that's UNDEF too.
+    if (N3.isUndef())
+      return getUNDEF(VT);
+
+    // If the inserted element is an UNDEF, just use the input vector.
+    if (N2.isUndef())
+      return N1;
+
     break;
   }
   case ISD::INSERT_SUBVECTOR: {
diff --git a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
@@ -7,6 +7,8 @@
 
 
 ; CHECK: fmla
+; CHECK-NEXT: mov
+; CHECK-NEXT: mov
 ; CHECK-NEXT: fmla
 ; CHECK-NEXT: fmla
 ; CHECK-NEXT: fmla
@@ -16,33 +18,29 @@ target triple = "aarch64--linux-gnu"
 %Struct = type { i64*, [9 x double], [16 x {float, float}], [16 x {float, float}], i32, i32 }
 
 ; Function Attrs: nounwind
-define linkonce_odr void @func(%Struct* nocapture %this) unnamed_addr #0 align 2 {
+define linkonce_odr void @func(%Struct* nocapture %this, <4 x float> %f) unnamed_addr #0 align 2 {
 entry:
-  %0 = insertelement <4 x float> undef, float undef, i32 0
-  %1 = insertelement <4 x float> %0, float undef, i32 1
-  %2 = insertelement <4 x float> %1, float undef, i32 2
-  %3 = insertelement <4 x float> %2, float undef, i32 3
   %scevgep = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 8, i32 0
   %struct_ptr = bitcast float* %scevgep to i8*
   %vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr)
   %ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1
-  %fm1 = fmul <4 x float> %0, %ev1
-  %av1 = fadd <4 x float> %1, %fm1
+  %fm1 = fmul <4 x float> %f, %ev1
+  %av1 = fadd <4 x float> %f, %fm1
   %ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0
-  %fm2 = fmul <4 x float> %2, %ev2
-  %av2 = fadd <4 x float> %3, %fm2
+  %fm2 = fmul <4 x float> %f, %ev2
+  %av2 = fadd <4 x float> %f, %fm2
   %scevgep2 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 8, i32 0
   %struct_ptr2 = bitcast float* %scevgep2 to i8*
   tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av2, <4 x float> %av1, i8* %struct_ptr2)
   %scevgep3 = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 12, i32 0
   %struct_ptr3 = bitcast float* %scevgep3 to i8*
   %vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr3)
   %ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1
-  %fm3 = fmul <4 x float> %0, %ev3
-  %av3 = fadd <4 x float> %1, %fm3
+  %fm3 = fmul <4 x float> %f, %ev3
+  %av3 = fadd <4 x float> %f, %fm3
   %ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0
-  %fm4 = fmul <4 x float> %2, %ev4
-  %av4 = fadd <4 x float> %3, %fm4
+  %fm4 = fmul <4 x float> %f, %ev4
+  %av4 = fadd <4 x float> %f, %fm4
   %scevgep4 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 12, i32 0
   %struct_ptr4 = bitcast float* %scevgep4 to i8*
   tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av4, <4 x float> %av3, i8* %struct_ptr4)
diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll
@@ -193,13 +193,13 @@ define <16 x i8> @replace_var_v16i8(<16 x i8> %v, i32 %i, i8 %x) {
   ret <16 x i8> %res
 }
 
-; CHECK-LABEL: replace_undef_v16i8:
+; CHECK-LABEL: replace_zero_v16i8:
 ; NO-SIMD128-NOT: i8x16
-; SIMD128-NEXT: .functype replace_undef_v16i8 (v128, i32) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v16i8 (v128, i32) -> (v128){{$}}
 ; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <16 x i8> @replace_undef_v16i8(<16 x i8> %v, i8 %x) {
-  %res = insertelement <16 x i8> %v, i8 %x, i32 undef
+define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) {
+  %res = insertelement <16 x i8> %v, i8 %x, i32 0
   ret <16 x i8> %res
 }
 
@@ -464,13 +464,13 @@ define <8 x i16> @replace_var_v8i16(<8 x i16> %v, i32 %i, i16 %x) {
   ret <8 x i16> %res
 }
 
-; CHECK-LABEL: replace_undef_v8i16:
+; CHECK-LABEL: replace_zero_v8i16:
 ; NO-SIMD128-NOT: i16x8
-; SIMD128-NEXT: .functype replace_undef_v8i16 (v128, i32) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v8i16 (v128, i32) -> (v128){{$}}
 ; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <8 x i16> @replace_undef_v8i16(<8 x i16> %v, i16 %x) {
-  %res = insertelement <8 x i16> %v, i16 %x, i32 undef
+define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) {
+  %res = insertelement <8 x i16> %v, i16 %x, i32 0
   ret <8 x i16> %res
 }
 
@@ -625,13 +625,13 @@ define <4 x i32> @replace_var_v4i32(<4 x i32> %v, i32 %i, i32 %x) {
   ret <4 x i32> %res
 }
 
-; CHECK-LABEL: replace_undef_v4i32:
+; CHECK-LABEL: replace_zero_v4i32:
 ; NO-SIMD128-NOT: i32x4
-; SIMD128-NEXT: .functype replace_undef_v4i32 (v128, i32) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v4i32 (v128, i32) -> (v128){{$}}
 ; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <4 x i32> @replace_undef_v4i32(<4 x i32> %v, i32 %x) {
-  %res = insertelement <4 x i32> %v, i32 %x, i32 undef
+define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) {
+  %res = insertelement <4 x i32> %v, i32 %x, i32 0
   ret <4 x i32> %res
 }
 
@@ -781,14 +781,14 @@ define <2 x i64> @replace_var_v2i64(<2 x i64> %v, i32 %i, i64 %x) {
   ret <2 x i64> %res
 }
 
-; CHECK-LABEL: replace_undef_v2i64:
+; CHECK-LABEL: replace_zero_v2i64:
 ; NO-SIMD128-NOT: i64x2
 ; SIMD128-VM-NOT: i64x2
-; SIMD128-NEXT: .functype replace_undef_v2i64 (v128, i64) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v2i64 (v128, i64) -> (v128){{$}}
 ; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <2 x i64> @replace_undef_v2i64(<2 x i64> %v, i64 %x) {
-  %res = insertelement <2 x i64> %v, i64 %x, i32 undef
+define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) {
+  %res = insertelement <2 x i64> %v, i64 %x, i32 0
   ret <2 x i64> %res
 }
 
@@ -931,13 +931,13 @@ define <4 x float> @replace_var_v4f32(<4 x float> %v, i32 %i, float %x) {
   ret <4 x float> %res
 }
 
-; CHECK-LABEL: replace_undef_v4f32:
+; CHECK-LABEL: replace_zero_v4f32:
 ; NO-SIMD128-NOT: f32x4
-; SIMD128-NEXT: .functype replace_undef_v4f32 (v128, f32) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v4f32 (v128, f32) -> (v128){{$}}
 ; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <4 x float> @replace_undef_v4f32(<4 x float> %v, float %x) {
-  %res = insertelement <4 x float> %v, float %x, i32 undef
+define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) {
+  %res = insertelement <4 x float> %v, float %x, i32 0
   ret <4 x float> %res
 }
 
@@ -1086,14 +1086,14 @@ define <2 x double> @replace_var_v2f64(<2 x double> %v, i32 %i, double %x) {
   ret <2 x double> %res
 }
 
-; CHECK-LABEL: replace_undef_v2f64:
+; CHECK-LABEL: replace_zero_v2f64:
 ; NO-SIMD128-NOT: f64x2
 ; SIMD128-VM-NOT: f64x2
-; SIMD128-NEXT: .functype replace_undef_v2f64 (v128, f64) -> (v128){{$}}
+; SIMD128-NEXT: .functype replace_zero_v2f64 (v128, f64) -> (v128){{$}}
 ; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
 ; SIMD128-NEXT: return $pop[[R]]{{$}}
-define <2 x double> @replace_undef_v2f64(<2 x double> %v, double %x) {
-  %res = insertelement <2 x double> %v, double %x, i32 undef
+define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) {
+  %res = insertelement <2 x double> %v, double %x, i32 0
   ret <2 x double> %res
 }
 
diff --git a/llvm/test/CodeGen/X86/insertelement-var-index.ll b/llvm/test/CodeGen/X86/insertelement-var-index.ll
@@ -3,6 +3,22 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx    | FileCheck %s --check-prefixes=ALL,AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2   | FileCheck %s --check-prefixes=ALL,AVX,AVX2
 
+define <16 x i8> @undef_index(i8 %x) nounwind {
+; ALL-LABEL: undef_index:
+; ALL:       # %bb.0:
+; ALL-NEXT:    retq
+  %ins = insertelement <16 x i8> undef, i8 %x, i64 undef
+  ret <16 x i8> %ins
+}
+
+define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind {
+; ALL-LABEL: undef_scalar:
+; ALL:       # %bb.0:
+; ALL-NEXT:    retq
+  %ins = insertelement <16 x i8> %x, i8 undef, i32 %index
+  ret <16 x i8> %ins
+}
+
 define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind {
 ; SSE-LABEL: arg_i8_v16i8:
 ; SSE:       # %bb.0: