Skip to content

Commit 0cf3c67

Browse files
committed
[LegalizeTypes][X86][PowerPC] Use shift by 1 instead of adding a value to itself to double.
Using a shift is the correct way to handle undef and works better with our optimizations that move freeze around. The X86 code looks like an improvment, but PowerPC might be a regression. Hoping this improves some code for #86850.
1 parent 2598aa6 commit 0cf3c67

File tree

4 files changed

+38
-38
lines changed

4 files changed

+38
-38
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,8 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
428428
std::swap(Lo, Hi);
429429

430430
SDValue Idx = N->getOperand(2);
431-
Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
431+
Idx = DAG.getNode(ISD::SHL, dl, Idx.getValueType(), Idx,
432+
DAG.getShiftAmountConstant(1, Idx.getValueType(), dl));
432433
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
433434
Idx = DAG.getNode(ISD::ADD, dl,
434435
Idx.getValueType(), Idx,

llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,16 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
165165
;
166166
; CHECK-32-LABEL: testDoubleword:
167167
; CHECK-32: # %bb.0: # %entry
168-
; CHECK-32-NEXT: add 5, 6, 6
169168
; CHECK-32-NEXT: addi 7, 1, -32
169+
; CHECK-32-NEXT: rlwinm 5, 6, 3, 28, 28
170170
; CHECK-32-NEXT: stxv 34, -32(1)
171-
; CHECK-32-NEXT: rlwinm 6, 5, 2, 28, 29
172-
; CHECK-32-NEXT: stwx 3, 7, 6
173-
; CHECK-32-NEXT: addi 3, 5, 1
174-
; CHECK-32-NEXT: addi 5, 1, -16
171+
; CHECK-32-NEXT: stwx 3, 7, 5
172+
; CHECK-32-NEXT: slwi 3, 6, 1
173+
; CHECK-32-NEXT: li 5, 1
175174
; CHECK-32-NEXT: lxv 0, -32(1)
176-
; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29
175+
; CHECK-32-NEXT: rlwimi 5, 3, 0, 0, 30
176+
; CHECK-32-NEXT: rlwinm 3, 5, 2, 28, 29
177+
; CHECK-32-NEXT: addi 5, 1, -16
177178
; CHECK-32-NEXT: stxv 0, -16(1)
178179
; CHECK-32-NEXT: stwx 4, 5, 3
179180
; CHECK-32-NEXT: lxv 34, -16(1)
@@ -187,10 +188,11 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
187188
;
188189
; CHECK-32-P10-LABEL: testDoubleword:
189190
; CHECK-32-P10: # %bb.0: # %entry
190-
; CHECK-32-P10-NEXT: add 5, 6, 6
191-
; CHECK-32-P10-NEXT: slwi 6, 5, 2
191+
; CHECK-32-P10-NEXT: slwi 5, 6, 1
192+
; CHECK-32-P10-NEXT: slwi 6, 6, 3
192193
; CHECK-32-P10-NEXT: vinswlx 2, 6, 3
193-
; CHECK-32-P10-NEXT: addi 3, 5, 1
194+
; CHECK-32-P10-NEXT: li 3, 1
195+
; CHECK-32-P10-NEXT: rlwimi 3, 5, 0, 0, 30
194196
; CHECK-32-P10-NEXT: slwi 3, 3, 2
195197
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
196198
; CHECK-32-P10-NEXT: blr

llvm/test/CodeGen/PowerPC/vec_insert_elt.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,15 +241,16 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
241241
;
242242
; AIX-P8-32-LABEL: testDoubleword:
243243
; AIX-P8-32: # %bb.0: # %entry
244-
; AIX-P8-32-NEXT: add r6, r6, r6
245244
; AIX-P8-32-NEXT: addi r5, r1, -32
246-
; AIX-P8-32-NEXT: rlwinm r7, r6, 2, 28, 29
245+
; AIX-P8-32-NEXT: rlwinm r7, r6, 3, 28, 28
247246
; AIX-P8-32-NEXT: stxvw4x v2, 0, r5
248247
; AIX-P8-32-NEXT: stwx r3, r5, r7
249248
; AIX-P8-32-NEXT: addi r3, r1, -16
250249
; AIX-P8-32-NEXT: lxvw4x vs0, 0, r5
251-
; AIX-P8-32-NEXT: addi r5, r6, 1
252-
; AIX-P8-32-NEXT: rlwinm r5, r5, 2, 28, 29
250+
; AIX-P8-32-NEXT: slwi r5, r6, 1
251+
; AIX-P8-32-NEXT: li r6, 1
252+
; AIX-P8-32-NEXT: rlwimi r6, r5, 0, 0, 30
253+
; AIX-P8-32-NEXT: rlwinm r5, r6, 2, 28, 29
253254
; AIX-P8-32-NEXT: stxvw4x vs0, 0, r3
254255
; AIX-P8-32-NEXT: stwx r4, r3, r5
255256
; AIX-P8-32-NEXT: lxvw4x v2, 0, r3

llvm/test/CodeGen/X86/insertelement-var-index.ll

Lines changed: 20 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1013,14 +1013,13 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind {
10131013
; X86AVX2-NEXT: movl 12(%ebp), %ecx
10141014
; X86AVX2-NEXT: movl 16(%ebp), %edx
10151015
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1016-
; X86AVX2-NEXT: leal (%edx,%edx), %esi
1017-
; X86AVX2-NEXT: andl $3, %esi
1018-
; X86AVX2-NEXT: movl %eax, (%esp,%esi,4)
1016+
; X86AVX2-NEXT: leal 1(%edx,%edx), %esi
1017+
; X86AVX2-NEXT: andl $1, %edx
1018+
; X86AVX2-NEXT: movl %eax, (%esp,%edx,8)
10191019
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
10201020
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1021-
; X86AVX2-NEXT: leal 1(%edx,%edx), %eax
1022-
; X86AVX2-NEXT: andl $3, %eax
1023-
; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
1021+
; X86AVX2-NEXT: andl $3, %esi
1022+
; X86AVX2-NEXT: movl %ecx, 16(%esp,%esi,4)
10241023
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
10251024
; X86AVX2-NEXT: leal -4(%ebp), %esp
10261025
; X86AVX2-NEXT: popl %esi
@@ -1362,14 +1361,13 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind {
13621361
; X86AVX2-NEXT: movl (%ecx), %edx
13631362
; X86AVX2-NEXT: movl 4(%ecx), %ecx
13641363
; X86AVX2-NEXT: vmovaps %xmm0, (%esp)
1365-
; X86AVX2-NEXT: leal (%eax,%eax), %esi
1366-
; X86AVX2-NEXT: andl $3, %esi
1367-
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
1364+
; X86AVX2-NEXT: leal 1(%eax,%eax), %esi
1365+
; X86AVX2-NEXT: andl $1, %eax
1366+
; X86AVX2-NEXT: movl %edx, (%esp,%eax,8)
13681367
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
13691368
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1370-
; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
1371-
; X86AVX2-NEXT: andl $3, %eax
1372-
; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
1369+
; X86AVX2-NEXT: andl $3, %esi
1370+
; X86AVX2-NEXT: movl %ecx, 16(%esp,%esi,4)
13731371
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
13741372
; X86AVX2-NEXT: leal -4(%ebp), %esp
13751373
; X86AVX2-NEXT: popl %esi
@@ -1746,14 +1744,13 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind {
17461744
; X86AVX2-NEXT: movl 12(%ebp), %ecx
17471745
; X86AVX2-NEXT: movl 16(%ebp), %edx
17481746
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
1749-
; X86AVX2-NEXT: leal (%edx,%edx), %esi
1750-
; X86AVX2-NEXT: andl $7, %esi
1751-
; X86AVX2-NEXT: movl %eax, (%esp,%esi,4)
1747+
; X86AVX2-NEXT: leal 1(%edx,%edx), %esi
1748+
; X86AVX2-NEXT: andl $3, %edx
1749+
; X86AVX2-NEXT: movl %eax, (%esp,%edx,8)
17521750
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
17531751
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1754-
; X86AVX2-NEXT: leal 1(%edx,%edx), %eax
1755-
; X86AVX2-NEXT: andl $7, %eax
1756-
; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
1752+
; X86AVX2-NEXT: andl $7, %esi
1753+
; X86AVX2-NEXT: movl %ecx, 32(%esp,%esi,4)
17571754
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
17581755
; X86AVX2-NEXT: leal -4(%ebp), %esp
17591756
; X86AVX2-NEXT: popl %esi
@@ -2128,14 +2125,13 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind {
21282125
; X86AVX2-NEXT: movl (%ecx), %edx
21292126
; X86AVX2-NEXT: movl 4(%ecx), %ecx
21302127
; X86AVX2-NEXT: vmovaps %ymm0, (%esp)
2131-
; X86AVX2-NEXT: leal (%eax,%eax), %esi
2132-
; X86AVX2-NEXT: andl $7, %esi
2133-
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
2128+
; X86AVX2-NEXT: leal 1(%eax,%eax), %esi
2129+
; X86AVX2-NEXT: andl $3, %eax
2130+
; X86AVX2-NEXT: movl %edx, (%esp,%eax,8)
21342131
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
21352132
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
2136-
; X86AVX2-NEXT: leal 1(%eax,%eax), %eax
2137-
; X86AVX2-NEXT: andl $7, %eax
2138-
; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
2133+
; X86AVX2-NEXT: andl $7, %esi
2134+
; X86AVX2-NEXT: movl %ecx, 32(%esp,%esi,4)
21392135
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
21402136
; X86AVX2-NEXT: leal -4(%ebp), %esp
21412137
; X86AVX2-NEXT: popl %esi

0 commit comments

Comments
 (0)