Skip to content

Commit d8ca293

Browse files
committed
Add and use WidenVecRes_UnaryOpWithTwoResults for SINCOS + FREXPR
1 parent 5a1da25 commit d8ca293

File tree

3 files changed

+143
-8
lines changed

3 files changed

+143
-8
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1072,7 +1072,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
10721072
SDValue WidenVecRes_ExpOp(SDNode *N);
10731073
SDValue WidenVecRes_Unary(SDNode *N);
10741074
SDValue WidenVecRes_InregOp(SDNode *N);
1075-
SDValue WidenVecRes_FSINCOS(SDNode *N);
1075+
SDValue WidenVecRes_UnaryOpWithTwoResults(SDNode *N, unsigned ResNo);
1076+
void ReplaceOtherWidenResult(SDNode *N, SDNode *WidenNode,
1077+
unsigned WidenResNo);
10761078

10771079
// Widen Vector Operand.
10781080
bool WidenVectorOperand(SDNode *N, unsigned OpNo);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4435,6 +4435,22 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) {
44354435
// Result Vector Widening
44364436
//===----------------------------------------------------------------------===//
44374437

4438+
void DAGTypeLegalizer::ReplaceOtherWidenResult(SDNode *N, SDNode *WidenNode,
4439+
unsigned WidenResNo) {
4440+
assert(N->getNumValues() == 2 && "expected node with two results");
4441+
unsigned OtherNo = 1 - WidenResNo;
4442+
EVT OtherVT = N->getValueType(OtherNo);
4443+
if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
4444+
SetWidenedVector(SDValue(N, OtherNo), SDValue(WidenNode, OtherNo));
4445+
} else {
4446+
SDLoc DL(N);
4447+
SDValue OtherVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OtherVT,
4448+
SDValue(WidenNode, OtherNo),
4449+
DAG.getVectorIdxConstant(0, DL));
4450+
ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
4451+
}
4452+
}
4453+
44384454
void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
44394455
LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG));
44404456

@@ -4454,6 +4470,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
44544470
if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) &&
44554471
TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
44564472
Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
4473+
if (N->getNumValues() == 2)
4474+
ReplaceOtherWidenResult(N, Res.getNode(), ResNo);
44574475
return true;
44584476
}
44594477
return false;
@@ -4758,12 +4776,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
47584776
case ISD::VP_FSHR:
47594777
Res = WidenVecRes_Ternary(N);
47604778
break;
4779+
case ISD::FFREXP:
47614780
case ISD::FSINCOS: {
47624781
if (!unrollExpandedOp())
4763-
Res = WidenVecRes_FSINCOS(N);
4764-
for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++)
4765-
SetWidenedVector(SDValue(N, ResNum), Res.getValue(ResNum));
4766-
Res = SDValue();
4782+
Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo);
47674783
break;
47684784
}
47694785
}
@@ -5514,10 +5530,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
55145530
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
55155531
}
55165532

5517-
SDValue DAGTypeLegalizer::WidenVecRes_FSINCOS(SDNode *N) {
5518-
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5533+
SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N,
5534+
unsigned ResNo) {
5535+
LLVMContext &Ctx = *DAG.getContext();
55195536
SDValue InOp = GetWidenedVector(N->getOperand(0));
5520-
return DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT, WidenVT}, InOp);
5537+
5538+
EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo));
5539+
ElementCount WidenEC = WidenVT.getVectorElementCount();
5540+
5541+
EVT VT0 = N->getValueType(0);
5542+
EVT VT1 = N->getValueType(1);
5543+
5544+
EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC);
5545+
EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC);
5546+
5547+
SDNode *WidenNode =
5548+
DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp)
5549+
.getNode();
5550+
5551+
ReplaceOtherWidenResult(N, WidenNode, ResNo);
5552+
return SDValue(WidenNode, ResNo);
55215553
}
55225554

55235555
SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
3+
4+
define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) {
5+
; CHECK-LABEL: test_frexp_v2f16_v2i32:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: sub sp, sp, #64
8+
; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
9+
; CHECK-NEXT: .cfi_def_cfa_offset 64
10+
; CHECK-NEXT: .cfi_offset w19, -8
11+
; CHECK-NEXT: .cfi_offset w30, -16
12+
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
13+
; CHECK-NEXT: mov h1, v0.h[1]
14+
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
15+
; CHECK-NEXT: add x0, sp, #36
16+
; CHECK-NEXT: add x19, sp, #36
17+
; CHECK-NEXT: fcvt s0, h1
18+
; CHECK-NEXT: bl frexpf
19+
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
20+
; CHECK-NEXT: fcvt h0, s0
21+
; CHECK-NEXT: add x0, sp, #32
22+
; CHECK-NEXT: fcvt s1, h1
23+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
24+
; CHECK-NEXT: fmov s0, s1
25+
; CHECK-NEXT: bl frexpf
26+
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
27+
; CHECK-NEXT: fcvt h2, s0
28+
; CHECK-NEXT: add x0, sp, #40
29+
; CHECK-NEXT: mov h1, v1.h[2]
30+
; CHECK-NEXT: fcvt s0, h1
31+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
32+
; CHECK-NEXT: mov v2.h[1], v1.h[0]
33+
; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill
34+
; CHECK-NEXT: bl frexpf
35+
; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
36+
; CHECK-NEXT: fcvt h2, s0
37+
; CHECK-NEXT: add x0, sp, #44
38+
; CHECK-NEXT: mov h1, v1.h[3]
39+
; CHECK-NEXT: fcvt s0, h1
40+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
41+
; CHECK-NEXT: mov v1.h[2], v2.h[0]
42+
; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
43+
; CHECK-NEXT: bl frexpf
44+
; CHECK-NEXT: fcvt h2, s0
45+
; CHECK-NEXT: ldr s1, [sp, #32]
46+
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
47+
; CHECK-NEXT: ld1 { v1.s }[1], [x19]
48+
; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
49+
; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
50+
; CHECK-NEXT: mov v0.h[3], v2.h[0]
51+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
52+
; CHECK-NEXT: add sp, sp, #64
53+
; CHECK-NEXT: ret
54+
%result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a)
55+
ret { <2 x half>, <2 x i32> } %result
56+
}
57+
58+
define { <3 x float>, <3 x i32> } @test_frexp_v3f16_v3i32(<3 x float> %a) {
59+
; CHECK-LABEL: test_frexp_v3f16_v3i32:
60+
; CHECK: // %bb.0:
61+
; CHECK-NEXT: sub sp, sp, #80
62+
; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
63+
; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
64+
; CHECK-NEXT: .cfi_def_cfa_offset 80
65+
; CHECK-NEXT: .cfi_offset w19, -8
66+
; CHECK-NEXT: .cfi_offset w20, -16
67+
; CHECK-NEXT: .cfi_offset w30, -32
68+
; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
69+
; CHECK-NEXT: mov s0, v0.s[1]
70+
; CHECK-NEXT: add x0, sp, #56
71+
; CHECK-NEXT: add x19, sp, #56
72+
; CHECK-NEXT: bl frexpf
73+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
74+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
75+
; CHECK-NEXT: add x0, sp, #44
76+
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
77+
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
78+
; CHECK-NEXT: bl frexpf
79+
; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
80+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
81+
; CHECK-NEXT: add x0, sp, #60
82+
; CHECK-NEXT: add x20, sp, #60
83+
; CHECK-NEXT: mov v0.s[1], v1.s[0]
84+
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
85+
; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
86+
; CHECK-NEXT: mov s0, v0.s[2]
87+
; CHECK-NEXT: bl frexpf
88+
; CHECK-NEXT: ldr s1, [sp, #44]
89+
; CHECK-NEXT: ldr q2, [sp] // 16-byte Folded Reload
90+
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
91+
; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
92+
; CHECK-NEXT: ld1 { v1.s }[1], [x19]
93+
; CHECK-NEXT: mov v2.s[2], v0.s[0]
94+
; CHECK-NEXT: ld1 { v1.s }[2], [x20]
95+
; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
96+
; CHECK-NEXT: mov v0.16b, v2.16b
97+
; CHECK-NEXT: add sp, sp, #80
98+
; CHECK-NEXT: ret
99+
%result = call { <3 x float>, <3 x i32> } @llvm.frexp.v3float.v3i32(<3 x float> %a)
100+
ret { <3 x float>, <3 x i32> } %result
101+
}

0 commit comments

Comments
 (0)