Skip to content

Commit e15c4e3

Browse files
committed
Fixups
1 parent bf8f0d9 commit e15c4e3

File tree

2 files changed

+60
-21
lines changed

2 files changed

+60
-21
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2353,29 +2353,23 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(
23532353
Type *Ty = VT.getTypeForEVT(*DAG.getContext());
23542354
RTLIB::Libcall LC = RTLIB::getFSINCOS(VT);
23552355

2356-
// Find users of the node that store the results. The destination pointers
2357-
// can be used instead of creating stack allocations.
2356+
// Find users of the node that store the results (and share input chains). The
2357+
// destination pointers can be used instead of creating stack allocations.
2358+
SDValue StoresInChain{};
23582359
std::array<StoreSDNode *, 2> ResultStores = {nullptr};
23592360
for (SDNode *User : Node->uses()) {
23602361
if (!ISD::isNormalStore(User))
23612362
continue;
23622363
auto *ST = cast<StoreSDNode>(User);
23632364
if (!ST->isSimple() || ST->getAddressSpace() != 0 ||
2364-
ST->getAlign() < DAG.getDataLayout().getABITypeAlign(Ty))
2365+
ST->getAlign() < DAG.getDataLayout().getABITypeAlign(Ty) ||
2366+
(StoresInChain && ST->getChain() != StoresInChain) ||
2367+
Node->isPredecessorOf(ST->getChain().getNode()))
23652368
continue;
23662369
ResultStores[ST->getValue().getResNo()] = ST;
2370+
StoresInChain = ST->getChain();
23672371
}
23682372

2369-
// Collect input chains (and avoid chains referring to one of the stores).
2370-
SmallVector<SDValue, 2> InChains;
2371-
for (auto [ResNum, ST] : llvm::enumerate(ResultStores)) {
2372-
unsigned OtherResNum = ResNum == 0 ? 1 : 0;
2373-
if (ST && ST->getChain().getNode() != ResultStores[OtherResNum])
2374-
InChains.push_back(ST->getChain());
2375-
}
2376-
if (InChains.empty())
2377-
InChains.push_back(DAG.getEntryNode());
2378-
23792373
TargetLowering::ArgListTy Args;
23802374
TargetLowering::ArgListEntry Entry{};
23812375

@@ -2395,9 +2389,7 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(
23952389
}
23962390

23972391
SDLoc DL(Node);
2398-
2399-
// Combine any input chains from the stores.
2400-
SDValue InChain = DAG.getTokenFactor(DL, InChains);
2392+
SDValue InChain = StoresInChain ? StoresInChain : DAG.getEntryNode();
24012393
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
24022394
TLI.getPointerTy(DAG.getDataLayout()));
24032395
TargetLowering::CallLoweringInfo CLI(DAG);

llvm/test/CodeGen/AArch64/sincos-stack-slots.ll

Lines changed: 52 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ entry:
2323
ret { float, float } %ret_1
2424
}
2525

26-
define void @sincos_f32_ptr_return(float %x, ptr %out_sin, ptr %out_cos) {
26+
define void @sincos_f32_ptr_return(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
2727
; CHECK-LABEL: sincos_f32_ptr_return:
2828
; CHECK: // %bb.0: // %entry
2929
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -81,7 +81,7 @@ entry:
8181
ret { double, double } %ret_1
8282
}
8383

84-
define void @sincos_f64_ptr_return(double %x, ptr %out_sin, ptr %out_cos) {
84+
define void @sincos_f64_ptr_return(double %x, ptr noalias %out_sin, ptr noalias %out_cos) {
8585
; CHECK-LABEL: sincos_f64_ptr_return:
8686
; CHECK: // %bb.0: // %entry
8787
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
@@ -116,8 +116,55 @@ entry:
116116
ret double %cos
117117
}
118118

119+
; Here %out_sin and %out_cos may alias so we can't replace both stores with the
120+
; call to sincosf (as the order of stores in sincosf is not defined).
121+
define void @sincos_may_alias(float %x, ptr %out_sin, ptr %out_cos) {
122+
; CHECK-LABEL: sincos_may_alias:
123+
; CHECK: // %bb.0: // %entry
124+
; CHECK-NEXT: sub sp, sp, #32
125+
; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill
126+
; CHECK-NEXT: .cfi_def_cfa_offset 32
127+
; CHECK-NEXT: .cfi_offset w19, -8
128+
; CHECK-NEXT: .cfi_offset w30, -16
129+
; CHECK-NEXT: mov x19, x1
130+
; CHECK-NEXT: add x1, sp, #12
131+
; CHECK-NEXT: bl sincosf
132+
; CHECK-NEXT: ldr s0, [sp, #12]
133+
; CHECK-NEXT: str s0, [x19]
134+
; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload
135+
; CHECK-NEXT: add sp, sp, #32
136+
; CHECK-NEXT: ret
137+
entry:
138+
%sin = tail call float @llvm.sin.f32(float %x)
139+
%cos = tail call float @llvm.cos.f32(float %x)
140+
store float %sin, ptr %out_sin, align 4
141+
store float %cos, ptr %out_cos, align 4
142+
ret void
143+
}
144+
145+
; Here %out is used for both sin and cos (with the final value stored being cos).
146+
define float @sincos_multiple_uses(float %x, ptr %out) {
147+
; CHECK-LABEL: sincos_multiple_uses:
148+
; CHECK: // %bb.0:
149+
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
150+
; CHECK-NEXT: .cfi_def_cfa_offset 16
151+
; CHECK-NEXT: .cfi_offset w30, -16
152+
; CHECK-NEXT: mov x1, x0
153+
; CHECK-NEXT: add x0, sp, #12
154+
; CHECK-NEXT: bl sincosf
155+
; CHECK-NEXT: ldr s0, [sp, #12]
156+
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
157+
; CHECK-NEXT: ret
158+
%sin = call float @llvm.sin.f32(float %x)
159+
store float %sin, ptr %out, align 4
160+
%reload = load float, ptr %out, align 4
161+
%cos = call float @llvm.cos.f32(float %x)
162+
store float %cos, ptr %out, align 4
163+
ret float %reload
164+
}
165+
119166
; Negative test. We can't fold volatile stores into the library call.
120-
define void @sincos_volatile_result_stores(float %x, ptr %out_sin, ptr %out_cos) {
167+
define void @sincos_volatile_result_stores(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
121168
; CHECK-LABEL: sincos_volatile_result_stores:
122169
; CHECK: // %bb.0: // %entry
123170
; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
@@ -146,7 +193,7 @@ entry:
146193
}
147194

148195
; Negative test. We can't fold atomic stores into the library call.
149-
define void @sincos_atomic_result_stores(float %x, ptr %out_sin, ptr %out_cos) {
196+
define void @sincos_atomic_result_stores(float %x, ptr noalias %out_sin, ptr noalias %out_cos) {
150197
; CHECK-LABEL: sincos_atomic_result_stores:
151198
; CHECK: // %bb.0: // %entry
152199
; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
@@ -176,7 +223,7 @@ entry:
176223
}
177224

178225
; Negative test. We can't fold misaligned stores into the library call.
179-
define void @sincos_misaligned_result_stores(double %x, ptr %out_sin, ptr %out_cos) {
226+
define void @sincos_misaligned_result_stores(double %x, ptr noalias %out_sin, ptr noalias %out_cos) {
180227
; CHECK-LABEL: sincos_misaligned_result_stores:
181228
; CHECK: // %bb.0: // %entry
182229
; CHECK-NEXT: sub sp, sp, #48

0 commit comments

Comments
 (0)