Skip to content

Commit de775f2

Browse files
committed
[DAG] Add constant SPLAT handling in getNodes SIGN_EXTEND_INREG
This helps simplify constant splats a little. Without this the code in llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp#L14072 always returns the existing node. Differential Revision: https://reviews.llvm.org/D157259
1 parent 5bd8f48 commit de775f2

File tree

3 files changed

+50
-7
lines changed

3 files changed

+50
-7
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6592,6 +6592,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
65926592
}
65936593
return getBuildVector(VT, DL, Ops);
65946594
}
6595+
6596+
if (N1.getOpcode() == ISD::SPLAT_VECTOR &&
6597+
isa<ConstantSDNode>(N1.getOperand(0)))
6598+
return getNode(
6599+
ISD::SPLAT_VECTOR, DL, VT,
6600+
SignExtendInReg(N1.getConstantOperandAPInt(0),
6601+
N1.getOperand(0).getValueType()));
65956602
break;
65966603
}
65976604
case ISD::FP_TO_SINT_SAT:

llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,17 @@ define i64 @scalable_int_min_max(ptr %arg, ptr %arg1, <vscale x 2 x ptr> %i37, <
66
; CHECK: // %bb.0: // %entry
77
; CHECK-NEXT: ptrue p0.d
88
; CHECK-NEXT: mov w8, #3745 // =0xea1
9-
; CHECK-NEXT: movk w8, #16618, lsl #16
109
; CHECK-NEXT: ld1w { z3.d }, p0/z, [x0]
10+
; CHECK-NEXT: movk w8, #16618, lsl #16
1111
; CHECK-NEXT: mov w9, #57344 // =0xe000
12-
; CHECK-NEXT: mov z6.d, #1023 // =0x3ff
1312
; CHECK-NEXT: movk w9, #17535, lsl #16
1413
; CHECK-NEXT: mov z4.s, w8
1514
; CHECK-NEXT: fmul z4.s, p0/m, z4.s, z3.s
1615
; CHECK-NEXT: mov z5.s, w9
1716
; CHECK-NEXT: fadd z4.s, p0/m, z4.s, z5.s
18-
; CHECK-NEXT: mov z5.d, #0 // =0x0
17+
; CHECK-NEXT: mov z5.d, #1023 // =0x3ff
1918
; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s
20-
; CHECK-NEXT: sxtw z5.d, p0/m, z5.d
21-
; CHECK-NEXT: smax z4.d, p0/m, z4.d, z5.d
22-
; CHECK-NEXT: movprfx z5, z6
23-
; CHECK-NEXT: sxtw z5.d, p0/m, z6.d
19+
; CHECK-NEXT: smax z4.d, z4.d, #0
2420
; CHECK-NEXT: smin z4.d, p0/m, z4.d, z5.d
2521
; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0
2622
; CHECK-NEXT: ld1w { z4.d }, p1/z, [x1]
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -O2 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3+
4+
define <vscale x 8 x i16> @sext_splat_v8i16_128() {
5+
; CHECK-LABEL: sext_splat_v8i16_128:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: mov z0.h, #-128 // =0xffffffffffffff80
8+
; CHECK-NEXT: ret
9+
%i = insertelement <vscale x 8 x i16> poison, i16 128, i32 0
10+
%s = shufflevector <vscale x 8 x i16> %i, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
11+
%a = shl <vscale x 8 x i16> %s, shufflevector (<vscale x 8 x i16> insertelement(<vscale x 8 x i16> undef, i16 8, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer)
12+
%b = ashr <vscale x 8 x i16> %a, shufflevector (<vscale x 8 x i16> insertelement(<vscale x 8 x i16> undef, i16 8, i32 0), <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer)
13+
ret <vscale x 8 x i16> %b
14+
}
15+
16+
define <vscale x 8 x i1> @sext_icmp_splat_v8i16_128(<vscale x 8 x i8> %d) {
17+
; CHECK-LABEL: sext_icmp_splat_v8i16_128:
18+
; CHECK: // %bb.0:
19+
; CHECK-NEXT: ptrue p0.h
20+
; CHECK-NEXT: sxtb z0.h, p0/m, z0.h
21+
; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, #-1
22+
; CHECK-NEXT: ret
23+
%i = insertelement <vscale x 8 x i8> poison, i8 128, i32 0
24+
%s = shufflevector <vscale x 8 x i8> %i, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
25+
%c = icmp ugt <vscale x 8 x i8> %s, %d
26+
ret <vscale x 8 x i1> %c
27+
}
28+
29+
define <vscale x 4 x i1> @sext_icmp_splat_v4i16_128(<vscale x 4 x i8> %d) {
30+
; CHECK-LABEL: sext_icmp_splat_v4i16_128:
31+
; CHECK: // %bb.0:
32+
; CHECK-NEXT: ptrue p0.s
33+
; CHECK-NEXT: sxtb z0.s, p0/m, z0.s
34+
; CHECK-NEXT: cmpgt p0.s, p0/z, z0.s, #-1
35+
; CHECK-NEXT: ret
36+
%i = insertelement <vscale x 4 x i8> poison, i8 128, i32 0
37+
%s = shufflevector <vscale x 4 x i8> %i, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
38+
%c = icmp ugt <vscale x 4 x i8> %s, %d
39+
ret <vscale x 4 x i1> %c
40+
}

0 commit comments

Comments
 (0)