Skip to content

Commit 249ed6f

Browse files
committed
[DAG] Extend extract_element(bitcast(scalar_to_vector(X))) -> trunc(srl(X,C))
When extracting a smaller integer from a scalar_to_vector source, we were limited to only folding away the lowest bits of the scalar. This patch extends the fold to handle extraction of any other element, by right shifting the source before truncation. Fixes a regression from llvm#117884
1 parent e98396f commit 249ed6f

File tree

5 files changed

+49
-63
lines changed

5 files changed

+49
-63
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23055,18 +23055,29 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
2305523055
// ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
2305623056
// trunc i64 X to i32
2305723057
SDValue X = BCSrc.getOperand(0);
23058-
assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
23058+
EVT XVT = X.getValueType();
23059+
assert(XVT.isScalarInteger() && ScalarVT.isScalarInteger() &&
2305923060
"Extract element and scalar to vector can't change element type "
2306023061
"from FP to integer.");
2306123062
unsigned XBitWidth = X.getValueSizeInBits();
23062-
BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
23063+
unsigned Scale = XBitWidth / VecEltBitWidth;
23064+
BCTruncElt = IsLE ? 0 : Scale - 1;
2306323065

2306423066
// An extract element return value type can be wider than its vector
2306523067
// operand element type. In that case, the high bits are undefined, so
2306623068
// it's possible that we may need to extend rather than truncate.
23067-
if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
23069+
if (ExtractIndex < Scale && XBitWidth > VecEltBitWidth) {
2306823070
assert(XBitWidth % VecEltBitWidth == 0 &&
2306923071
"Scalar bitwidth must be a multiple of vector element bitwidth");
23072+
23073+
if (ExtractIndex != BCTruncElt) {
23074+
unsigned ShiftIndex =
23075+
IsLE ? ExtractIndex : (Scale - 1) - ExtractIndex;
23076+
X = DAG.getNode(
23077+
ISD::SRL, DL, XVT, X,
23078+
DAG.getShiftAmountConstant(ShiftIndex * VecEltBitWidth, XVT, DL));
23079+
}
23080+
2307023081
return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
2307123082
}
2307223083
}

llvm/test/CodeGen/AArch64/extract-insert.ll

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
define i32 @trunc_i64_to_i32_le(i64 %x) {
66
; BE-LABEL: trunc_i64_to_i32_le:
77
; BE: // %bb.0:
8-
; BE-NEXT: fmov d0, x0
9-
; BE-NEXT: rev64 v0.4s, v0.4s
10-
; BE-NEXT: fmov w0, s0
8+
; BE-NEXT: lsr x0, x0, #32
9+
; BE-NEXT: // kill: def $w0 killed $w0 killed $x0
1110
; BE-NEXT: ret
1211
;
1312
; LE-LABEL: trunc_i64_to_i32_le:
@@ -28,8 +27,8 @@ define i32 @trunc_i64_to_i32_be(i64 %x) {
2827
;
2928
; LE-LABEL: trunc_i64_to_i32_be:
3029
; LE: // %bb.0:
31-
; LE-NEXT: fmov d0, x0
32-
; LE-NEXT: mov w0, v0.s[1]
30+
; LE-NEXT: lsr x0, x0, #32
31+
; LE-NEXT: // kill: def $w0 killed $w0 killed $x0
3332
; LE-NEXT: ret
3433
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
3534
%bc = bitcast <2 x i64> %ins to <4 x i32>
@@ -40,9 +39,8 @@ define i32 @trunc_i64_to_i32_be(i64 %x) {
4039
define i16 @trunc_i64_to_i16_le(i64 %x) {
4140
; BE-LABEL: trunc_i64_to_i16_le:
4241
; BE: // %bb.0:
43-
; BE-NEXT: fmov d0, x0
44-
; BE-NEXT: rev64 v0.8h, v0.8h
45-
; BE-NEXT: umov w0, v0.h[0]
42+
; BE-NEXT: lsr x0, x0, #48
43+
; BE-NEXT: // kill: def $w0 killed $w0 killed $x0
4644
; BE-NEXT: ret
4745
;
4846
; LE-LABEL: trunc_i64_to_i16_le:
@@ -63,8 +61,8 @@ define i16 @trunc_i64_to_i16_be(i64 %x) {
6361
;
6462
; LE-LABEL: trunc_i64_to_i16_be:
6563
; LE: // %bb.0:
66-
; LE-NEXT: fmov d0, x0
67-
; LE-NEXT: umov w0, v0.h[3]
64+
; LE-NEXT: lsr x0, x0, #48
65+
; LE-NEXT: // kill: def $w0 killed $w0 killed $x0
6866
; LE-NEXT: ret
6967
%ins = insertelement <2 x i64> undef, i64 %x, i32 0
7068
%bc = bitcast <2 x i64> %ins to <8 x i16>
@@ -75,9 +73,7 @@ define i16 @trunc_i64_to_i16_be(i64 %x) {
7573
define i8 @trunc_i32_to_i8_le(i32 %x) {
7674
; BE-LABEL: trunc_i32_to_i8_le:
7775
; BE: // %bb.0:
78-
; BE-NEXT: fmov s0, w0
79-
; BE-NEXT: rev32 v0.16b, v0.16b
80-
; BE-NEXT: umov w0, v0.b[0]
76+
; BE-NEXT: lsr w0, w0, #24
8177
; BE-NEXT: ret
8278
;
8379
; LE-LABEL: trunc_i32_to_i8_le:
@@ -96,8 +92,7 @@ define i8 @trunc_i32_to_i8_be(i32 %x) {
9692
;
9793
; LE-LABEL: trunc_i32_to_i8_be:
9894
; LE: // %bb.0:
99-
; LE-NEXT: fmov s0, w0
100-
; LE-NEXT: umov w0, v0.b[3]
95+
; LE-NEXT: lsr w0, w0, #24
10196
; LE-NEXT: ret
10297
%ins = insertelement <4 x i32> undef, i32 %x, i32 0
10398
%bc = bitcast <4 x i32> %ins to <16 x i8>
@@ -115,8 +110,8 @@ define i8 @trunc_i64_to_i8_be(i64 %x) {
115110
;
116111
; LE-LABEL: trunc_i64_to_i8_be:
117112
; LE: // %bb.0:
118-
; LE-NEXT: fmov d0, x0
119-
; LE-NEXT: umov w0, v0.b[7]
113+
; LE-NEXT: lsr x0, x0, #56
114+
; LE-NEXT: // kill: def $w0 killed $w0 killed $x0
120115
; LE-NEXT: ret
121116
%ins = insertelement <3 x i64> undef, i64 %x, i32 0
122117
%bc = bitcast <3 x i64> %ins to <24 x i8>

llvm/test/CodeGen/AArch64/vec3-loads-ext-trunc-stores.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,11 +358,10 @@ define void @store_trunc_from_64bits(ptr %src, ptr %dst) {
358358
; CHECK-NEXT: ldr w8, [x0]
359359
; CHECK-NEXT: add x9, x0, #4
360360
; CHECK-NEXT: ld1r.4h { v0 }, [x9]
361-
; CHECK-NEXT: fmov s1, w8
361+
; CHECK-NEXT: lsr w9, w8, #16
362362
; CHECK-NEXT: strb w8, [x1]
363-
; CHECK-NEXT: add x8, x1, #1
364-
; CHECK-NEXT: st1.b { v1 }[2], [x8]
365363
; CHECK-NEXT: add x8, x1, #2
364+
; CHECK-NEXT: strb w9, [x1, #1]
366365
; CHECK-NEXT: st1.b { v0 }[4], [x8]
367366
; CHECK-NEXT: ret
368367
;

llvm/test/CodeGen/PowerPC/scalar_vector_test_5.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,8 @@ define i8 @scalar_to_vector_half(ptr nocapture readonly %ad) {
1616
;
1717
; P9BE-LABEL: scalar_to_vector_half:
1818
; P9BE: # %bb.0: # %entry
19-
; P9BE-NEXT: lxsihzx v2, 0, r3
20-
; P9BE-NEXT: li r3, 0
21-
; P9BE-NEXT: vsplth v2, v2, 3
22-
; P9BE-NEXT: vextublx r3, r3, v2
19+
; P9BE-NEXT: lhz r3, 0(r3)
20+
; P9BE-NEXT: srwi r3, r3, 24
2321
; P9BE-NEXT: blr
2422
;
2523
; P8LE-LABEL: scalar_to_vector_half:
@@ -30,10 +28,7 @@ define i8 @scalar_to_vector_half(ptr nocapture readonly %ad) {
3028
; P8BE-LABEL: scalar_to_vector_half:
3129
; P8BE: # %bb.0: # %entry
3230
; P8BE-NEXT: lhz r3, 0(r3)
33-
; P8BE-NEXT: sldi r3, r3, 48
34-
; P8BE-NEXT: mtfprd f0, r3
35-
; P8BE-NEXT: mffprd r3, f0
36-
; P8BE-NEXT: rldicl r3, r3, 8, 56
31+
; P8BE-NEXT: srwi r3, r3, 24
3732
; P8BE-NEXT: blr
3833
entry:
3934
%0 = load <2 x i8>, ptr %ad, align 1

llvm/test/CodeGen/X86/load-partial.ll

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
33
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
44
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
6-
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
77

88
;
99
; Partial Vector Loads - PR16739
@@ -382,38 +382,24 @@ define dso_local i32 @load_partial_illegal_type() {
382382
define dso_local void @PR43227(ptr %explicit_0, ptr %explicit_1) {
383383
; SSE-LABEL: PR43227:
384384
; SSE: # %bb.0:
385-
; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
386-
; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
387-
; SSE-NEXT: psrlq $32, %xmm0
388-
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
389-
; SSE-NEXT: pxor %xmm1, %xmm1
390-
; SSE-NEXT: movdqa %xmm1, 672(%rsi)
391-
; SSE-NEXT: movdqa %xmm0, 688(%rsi)
385+
; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
386+
; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
387+
; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
388+
; SSE-NEXT: xorps %xmm0, %xmm0
389+
; SSE-NEXT: movaps %xmm0, 672(%rsi)
390+
; SSE-NEXT: movaps %xmm1, 688(%rsi)
392391
; SSE-NEXT: retq
393392
;
394-
; AVX1-LABEL: PR43227:
395-
; AVX1: # %bb.0:
396-
; AVX1-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
397-
; AVX1-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
398-
; AVX1-NEXT: vpsrlq $32, %xmm0, %xmm0
399-
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
400-
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
401-
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
402-
; AVX1-NEXT: vmovaps %ymm0, 672(%rsi)
403-
; AVX1-NEXT: vzeroupper
404-
; AVX1-NEXT: retq
405-
;
406-
; AVX2-LABEL: PR43227:
407-
; AVX2: # %bb.0:
408-
; AVX2-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
409-
; AVX2-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
410-
; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm0
411-
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
412-
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
413-
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
414-
; AVX2-NEXT: vmovdqa %ymm0, 672(%rsi)
415-
; AVX2-NEXT: vzeroupper
416-
; AVX2-NEXT: retq
393+
; AVX-LABEL: PR43227:
394+
; AVX: # %bb.0:
395+
; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
396+
; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
397+
; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
398+
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
399+
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
400+
; AVX-NEXT: vmovaps %ymm0, 672(%rsi)
401+
; AVX-NEXT: vzeroupper
402+
; AVX-NEXT: retq
417403
%1 = getelementptr i32, ptr %explicit_0, i64 63
418404
%2 = load <3 x i32>, ptr %1, align 1
419405
%3 = shufflevector <3 x i32> %2, <3 x i32> undef, <2 x i32> <i32 1, i32 2>

0 commit comments

Comments
 (0)