Skip to content

Commit 85a2146

Browse files
committed
[SDAG] fold insert_vector_elt with undef index
Similar to: rG4c47617627fb This makes the DAG behavior consistent with IR's insertelement. https://bugs.llvm.org/show_bug.cgi?id=42689 I've tried to maintain test intent for AArch64 and WebAssembly by replacing undef index operands with something else.
1 parent f067dd8 commit 85a2146

File tree

5 files changed

+60
-41
lines changed

5 files changed

+60
-41
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16574,10 +16574,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
1657416574
SDValue EltNo = N->getOperand(2);
1657516575
SDLoc DL(N);
1657616576

16577-
// If the inserted element is an UNDEF, just use the input vector.
16578-
if (InVal.isUndef())
16579-
return InVec;
16580-
1658116577
EVT VT = InVec.getValueType();
1658216578
unsigned NumElts = VT.getVectorNumElements();
1658316579

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5506,6 +5506,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
55065506
// INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
55075507
if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
55085508
return getUNDEF(VT);
5509+
5510+
// Undefined index can be assumed out-of-bounds, so that's UNDEF too.
5511+
if (N3.isUndef())
5512+
return getUNDEF(VT);
5513+
5514+
// If the inserted element is an UNDEF, just use the input vector.
5515+
if (N2.isUndef())
5516+
return N1;
5517+
55095518
break;
55105519
}
55115520
case ISD::INSERT_SUBVECTOR: {

llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88

99
; CHECK: fmla
10+
; CHECK-NEXT: mov
11+
; CHECK-NEXT: mov
1012
; CHECK-NEXT: fmla
1113
; CHECK-NEXT: fmla
1214
; CHECK-NEXT: fmla
@@ -16,33 +18,29 @@ target triple = "aarch64--linux-gnu"
1618
%Struct = type { i64*, [9 x double], [16 x {float, float}], [16 x {float, float}], i32, i32 }
1719

1820
; Function Attrs: nounwind
19-
define linkonce_odr void @func(%Struct* nocapture %this) unnamed_addr #0 align 2 {
21+
define linkonce_odr void @func(%Struct* nocapture %this, <4 x float> %f) unnamed_addr #0 align 2 {
2022
entry:
21-
%0 = insertelement <4 x float> undef, float undef, i32 0
22-
%1 = insertelement <4 x float> %0, float undef, i32 1
23-
%2 = insertelement <4 x float> %1, float undef, i32 2
24-
%3 = insertelement <4 x float> %2, float undef, i32 3
2523
%scevgep = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 8, i32 0
2624
%struct_ptr = bitcast float* %scevgep to i8*
2725
%vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr)
2826
%ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1
29-
%fm1 = fmul <4 x float> %0, %ev1
30-
%av1 = fadd <4 x float> %1, %fm1
27+
%fm1 = fmul <4 x float> %f, %ev1
28+
%av1 = fadd <4 x float> %f, %fm1
3129
%ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0
32-
%fm2 = fmul <4 x float> %2, %ev2
33-
%av2 = fadd <4 x float> %3, %fm2
30+
%fm2 = fmul <4 x float> %f, %ev2
31+
%av2 = fadd <4 x float> %f, %fm2
3432
%scevgep2 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 8, i32 0
3533
%struct_ptr2 = bitcast float* %scevgep2 to i8*
3634
tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av2, <4 x float> %av1, i8* %struct_ptr2)
3735
%scevgep3 = getelementptr %Struct, %Struct* %this, i64 0, i32 2, i64 12, i32 0
3836
%struct_ptr3 = bitcast float* %scevgep3 to i8*
3937
%vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0i8(i8* %struct_ptr3)
4038
%ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1
41-
%fm3 = fmul <4 x float> %0, %ev3
42-
%av3 = fadd <4 x float> %1, %fm3
39+
%fm3 = fmul <4 x float> %f, %ev3
40+
%av3 = fadd <4 x float> %f, %fm3
4341
%ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0
44-
%fm4 = fmul <4 x float> %2, %ev4
45-
%av4 = fadd <4 x float> %3, %fm4
42+
%fm4 = fmul <4 x float> %f, %ev4
43+
%av4 = fadd <4 x float> %f, %fm4
4644
%scevgep4 = getelementptr %Struct, %Struct* %this, i64 0, i32 3, i64 12, i32 0
4745
%struct_ptr4 = bitcast float* %scevgep4 to i8*
4846
tail call void @llvm.aarch64.neon.st2.v4f32.p0i8(<4 x float> %av4, <4 x float> %av3, i8* %struct_ptr4)

llvm/test/CodeGen/WebAssembly/simd.ll

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,13 @@ define <16 x i8> @replace_var_v16i8(<16 x i8> %v, i32 %i, i8 %x) {
193193
ret <16 x i8> %res
194194
}
195195

196-
; CHECK-LABEL: replace_undef_v16i8:
196+
; CHECK-LABEL: replace_zero_v16i8:
197197
; NO-SIMD128-NOT: i8x16
198-
; SIMD128-NEXT: .functype replace_undef_v16i8 (v128, i32) -> (v128){{$}}
198+
; SIMD128-NEXT: .functype replace_zero_v16i8 (v128, i32) -> (v128){{$}}
199199
; SIMD128-NEXT: i8x16.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
200200
; SIMD128-NEXT: return $pop[[R]]{{$}}
201-
define <16 x i8> @replace_undef_v16i8(<16 x i8> %v, i8 %x) {
202-
%res = insertelement <16 x i8> %v, i8 %x, i32 undef
201+
define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) {
202+
%res = insertelement <16 x i8> %v, i8 %x, i32 0
203203
ret <16 x i8> %res
204204
}
205205

@@ -464,13 +464,13 @@ define <8 x i16> @replace_var_v8i16(<8 x i16> %v, i32 %i, i16 %x) {
464464
ret <8 x i16> %res
465465
}
466466

467-
; CHECK-LABEL: replace_undef_v8i16:
467+
; CHECK-LABEL: replace_zero_v8i16:
468468
; NO-SIMD128-NOT: i16x8
469-
; SIMD128-NEXT: .functype replace_undef_v8i16 (v128, i32) -> (v128){{$}}
469+
; SIMD128-NEXT: .functype replace_zero_v8i16 (v128, i32) -> (v128){{$}}
470470
; SIMD128-NEXT: i16x8.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
471471
; SIMD128-NEXT: return $pop[[R]]{{$}}
472-
define <8 x i16> @replace_undef_v8i16(<8 x i16> %v, i16 %x) {
473-
%res = insertelement <8 x i16> %v, i16 %x, i32 undef
472+
define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) {
473+
%res = insertelement <8 x i16> %v, i16 %x, i32 0
474474
ret <8 x i16> %res
475475
}
476476

@@ -625,13 +625,13 @@ define <4 x i32> @replace_var_v4i32(<4 x i32> %v, i32 %i, i32 %x) {
625625
ret <4 x i32> %res
626626
}
627627

628-
; CHECK-LABEL: replace_undef_v4i32:
628+
; CHECK-LABEL: replace_zero_v4i32:
629629
; NO-SIMD128-NOT: i32x4
630-
; SIMD128-NEXT: .functype replace_undef_v4i32 (v128, i32) -> (v128){{$}}
630+
; SIMD128-NEXT: .functype replace_zero_v4i32 (v128, i32) -> (v128){{$}}
631631
; SIMD128-NEXT: i32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
632632
; SIMD128-NEXT: return $pop[[R]]{{$}}
633-
define <4 x i32> @replace_undef_v4i32(<4 x i32> %v, i32 %x) {
634-
%res = insertelement <4 x i32> %v, i32 %x, i32 undef
633+
define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) {
634+
%res = insertelement <4 x i32> %v, i32 %x, i32 0
635635
ret <4 x i32> %res
636636
}
637637

@@ -781,14 +781,14 @@ define <2 x i64> @replace_var_v2i64(<2 x i64> %v, i32 %i, i64 %x) {
781781
ret <2 x i64> %res
782782
}
783783

784-
; CHECK-LABEL: replace_undef_v2i64:
784+
; CHECK-LABEL: replace_zero_v2i64:
785785
; NO-SIMD128-NOT: i64x2
786786
; SIMD128-VM-NOT: i64x2
787-
; SIMD128-NEXT: .functype replace_undef_v2i64 (v128, i64) -> (v128){{$}}
787+
; SIMD128-NEXT: .functype replace_zero_v2i64 (v128, i64) -> (v128){{$}}
788788
; SIMD128-NEXT: i64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
789789
; SIMD128-NEXT: return $pop[[R]]{{$}}
790-
define <2 x i64> @replace_undef_v2i64(<2 x i64> %v, i64 %x) {
791-
%res = insertelement <2 x i64> %v, i64 %x, i32 undef
790+
define <2 x i64> @replace_zero_v2i64(<2 x i64> %v, i64 %x) {
791+
%res = insertelement <2 x i64> %v, i64 %x, i32 0
792792
ret <2 x i64> %res
793793
}
794794

@@ -931,13 +931,13 @@ define <4 x float> @replace_var_v4f32(<4 x float> %v, i32 %i, float %x) {
931931
ret <4 x float> %res
932932
}
933933

934-
; CHECK-LABEL: replace_undef_v4f32:
934+
; CHECK-LABEL: replace_zero_v4f32:
935935
; NO-SIMD128-NOT: f32x4
936-
; SIMD128-NEXT: .functype replace_undef_v4f32 (v128, f32) -> (v128){{$}}
936+
; SIMD128-NEXT: .functype replace_zero_v4f32 (v128, f32) -> (v128){{$}}
937937
; SIMD128-NEXT: f32x4.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
938938
; SIMD128-NEXT: return $pop[[R]]{{$}}
939-
define <4 x float> @replace_undef_v4f32(<4 x float> %v, float %x) {
940-
%res = insertelement <4 x float> %v, float %x, i32 undef
939+
define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) {
940+
%res = insertelement <4 x float> %v, float %x, i32 0
941941
ret <4 x float> %res
942942
}
943943

@@ -1086,14 +1086,14 @@ define <2 x double> @replace_var_v2f64(<2 x double> %v, i32 %i, double %x) {
10861086
ret <2 x double> %res
10871087
}
10881088

1089-
; CHECK-LABEL: replace_undef_v2f64:
1089+
; CHECK-LABEL: replace_zero_v2f64:
10901090
; NO-SIMD128-NOT: f64x2
10911091
; SIMD128-VM-NOT: f64x2
1092-
; SIMD128-NEXT: .functype replace_undef_v2f64 (v128, f64) -> (v128){{$}}
1092+
; SIMD128-NEXT: .functype replace_zero_v2f64 (v128, f64) -> (v128){{$}}
10931093
; SIMD128-NEXT: f64x2.replace_lane $push[[R:[0-9]+]]=, $0, 0, $1{{$}}
10941094
; SIMD128-NEXT: return $pop[[R]]{{$}}
1095-
define <2 x double> @replace_undef_v2f64(<2 x double> %v, double %x) {
1096-
%res = insertelement <2 x double> %v, double %x, i32 undef
1095+
define <2 x double> @replace_zero_v2f64(<2 x double> %v, double %x) {
1096+
%res = insertelement <2 x double> %v, double %x, i32 0
10971097
ret <2 x double> %res
10981098
}
10991099

llvm/test/CodeGen/X86/insertelement-var-index.ll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,22 @@
33
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1
44
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX2
55

6+
define <16 x i8> @undef_index(i8 %x) nounwind {
7+
; ALL-LABEL: undef_index:
8+
; ALL: # %bb.0:
9+
; ALL-NEXT: retq
10+
%ins = insertelement <16 x i8> undef, i8 %x, i64 undef
11+
ret <16 x i8> %ins
12+
}
13+
14+
define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind {
15+
; ALL-LABEL: undef_scalar:
16+
; ALL: # %bb.0:
17+
; ALL-NEXT: retq
18+
%ins = insertelement <16 x i8> %x, i8 undef, i32 %index
19+
ret <16 x i8> %ins
20+
}
21+
622
define <16 x i8> @arg_i8_v16i8(i8 %x, i32 %y) nounwind {
723
; SSE-LABEL: arg_i8_v16i8:
824
; SSE: # %bb.0:

0 commit comments

Comments
 (0)