Skip to content

Commit b19897a

Browse files
committed
[LegalizeTypes][X86][PowerPC] Use shift by 1 instead of adding a value to itself to double.
Using a shift is the correct way to handle undef and works better with our optimizations that move freeze around. The X86 code looks like an improvment, but PowerPC might be a regression. Hoping this improves some code for #86850.
1 parent acab142 commit b19897a

File tree

4 files changed

+17
-15
lines changed

4 files changed

+17
-15
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -428,7 +428,8 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
428428
std::swap(Lo, Hi);
429429

430430
SDValue Idx = N->getOperand(2);
431-
Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
431+
Idx = DAG.getNode(ISD::SHL, dl, Idx.getValueType(), Idx,
432+
DAG.getShiftAmountConstant(1, Idx.getValueType(), dl));
432433
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
433434
Idx = DAG.getNode(ISD::ADD, dl,
434435
Idx.getValueType(), Idx,

llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -165,12 +165,12 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
165165
;
166166
; CHECK-32-LABEL: testDoubleword:
167167
; CHECK-32: # %bb.0: # %entry
168-
; CHECK-32-NEXT: add 5, 6, 6
169168
; CHECK-32-NEXT: addi 7, 1, -32
169+
; CHECK-32-NEXT: slwi 5, 6, 1
170+
; CHECK-32-NEXT: rlwinm 6, 6, 3, 28, 28
170171
; CHECK-32-NEXT: stxv 34, -32(1)
171-
; CHECK-32-NEXT: rlwinm 6, 5, 2, 28, 29
172172
; CHECK-32-NEXT: stwx 3, 7, 6
173-
; CHECK-32-NEXT: addi 3, 5, 1
173+
; CHECK-32-NEXT: ori 3, 5, 1
174174
; CHECK-32-NEXT: addi 5, 1, -16
175175
; CHECK-32-NEXT: lxv 0, -32(1)
176176
; CHECK-32-NEXT: rlwinm 3, 3, 2, 28, 29
@@ -187,10 +187,11 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
187187
;
188188
; CHECK-32-P10-LABEL: testDoubleword:
189189
; CHECK-32-P10: # %bb.0: # %entry
190-
; CHECK-32-P10-NEXT: add 5, 6, 6
191-
; CHECK-32-P10-NEXT: slwi 6, 5, 2
190+
; CHECK-32-P10-NEXT: slwi 5, 6, 1
191+
; CHECK-32-P10-NEXT: slwi 6, 6, 3
192192
; CHECK-32-P10-NEXT: vinswlx 2, 6, 3
193-
; CHECK-32-P10-NEXT: addi 3, 5, 1
193+
; CHECK-32-P10-NEXT: li 3, 1
194+
; CHECK-32-P10-NEXT: rlwimi 3, 5, 0, 0, 30
194195
; CHECK-32-P10-NEXT: slwi 3, 3, 2
195196
; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
196197
; CHECK-32-P10-NEXT: blr

llvm/test/CodeGen/PowerPC/vec_insert_elt.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -241,14 +241,14 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
241241
;
242242
; AIX-P8-32-LABEL: testDoubleword:
243243
; AIX-P8-32: # %bb.0: # %entry
244-
; AIX-P8-32-NEXT: add r6, r6, r6
245244
; AIX-P8-32-NEXT: addi r5, r1, -32
246-
; AIX-P8-32-NEXT: rlwinm r7, r6, 2, 28, 29
245+
; AIX-P8-32-NEXT: slwi r7, r6, 1
246+
; AIX-P8-32-NEXT: rlwinm r6, r6, 3, 28, 28
247247
; AIX-P8-32-NEXT: stxvw4x v2, 0, r5
248-
; AIX-P8-32-NEXT: stwx r3, r5, r7
248+
; AIX-P8-32-NEXT: stwx r3, r5, r6
249249
; AIX-P8-32-NEXT: addi r3, r1, -16
250250
; AIX-P8-32-NEXT: lxvw4x vs0, 0, r5
251-
; AIX-P8-32-NEXT: addi r5, r6, 1
251+
; AIX-P8-32-NEXT: ori r5, r7, 1
252252
; AIX-P8-32-NEXT: rlwinm r5, r5, 2, 28, 29
253253
; AIX-P8-32-NEXT: stxvw4x vs0, 0, r3
254254
; AIX-P8-32-NEXT: stwx r4, r3, r5

llvm/test/CodeGen/X86/insertelement-var-index.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,7 @@ define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind {
10191019
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
10201020
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
10211021
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1022-
; X86AVX2-NEXT: incl %ecx
1022+
; X86AVX2-NEXT: orl $1, %ecx
10231023
; X86AVX2-NEXT: andl $3, %ecx
10241024
; X86AVX2-NEXT: movl %eax, 16(%esp,%ecx,4)
10251025
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
@@ -1369,7 +1369,7 @@ define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind {
13691369
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
13701370
; X86AVX2-NEXT: vmovaps (%esp), %xmm0
13711371
; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
1372-
; X86AVX2-NEXT: incl %eax
1372+
; X86AVX2-NEXT: orl $1, %eax
13731373
; X86AVX2-NEXT: andl $3, %eax
13741374
; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4)
13751375
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0
@@ -1754,7 +1754,7 @@ define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind {
17541754
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
17551755
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
17561756
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
1757-
; X86AVX2-NEXT: incl %ecx
1757+
; X86AVX2-NEXT: orl $1, %ecx
17581758
; X86AVX2-NEXT: andl $7, %ecx
17591759
; X86AVX2-NEXT: movl %eax, 32(%esp,%ecx,4)
17601760
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0
@@ -2137,7 +2137,7 @@ define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind {
21372137
; X86AVX2-NEXT: movl %edx, (%esp,%esi,4)
21382138
; X86AVX2-NEXT: vmovaps (%esp), %ymm0
21392139
; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
2140-
; X86AVX2-NEXT: incl %eax
2140+
; X86AVX2-NEXT: orl $1, %eax
21412141
; X86AVX2-NEXT: andl $7, %eax
21422142
; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4)
21432143
; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0

0 commit comments

Comments
 (0)