Skip to content

Commit 37f4e82

Browse files
author
git apple-llvm automerger
committed
Merge commit '11bf02e0192a' from llvm.org/main into next
2 parents 3ce4464 + 11bf02e commit 37f4e82

File tree

7 files changed

+1084
-225
lines changed

7 files changed

+1084
-225
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ static const unsigned MaxParallelChains = 64;
155155
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
156156
const SDValue *Parts, unsigned NumParts,
157157
MVT PartVT, EVT ValueVT, const Value *V,
158+
SDValue InChain,
158159
std::optional<CallingConv::ID> CC);
159160

160161
/// getCopyFromParts - Create a value that contains the specified legal parts
@@ -165,6 +166,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
165166
static SDValue
166167
getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
167168
unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V,
169+
SDValue InChain,
168170
std::optional<CallingConv::ID> CC = std::nullopt,
169171
std::optional<ISD::NodeType> AssertOp = std::nullopt) {
170172
// Let the target assemble the parts if it wants to
@@ -175,7 +177,7 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
175177

176178
if (ValueVT.isVector())
177179
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
178-
CC);
180+
InChain, CC);
179181

180182
assert(NumParts > 0 && "No parts to assemble!");
181183
SDValue Val = Parts[0];
@@ -196,10 +198,10 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
196198
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
197199

198200
if (RoundParts > 2) {
199-
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
200-
PartVT, HalfVT, V);
201-
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
202-
RoundParts / 2, PartVT, HalfVT, V);
201+
Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, PartVT, HalfVT, V,
202+
InChain);
203+
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, RoundParts / 2,
204+
PartVT, HalfVT, V, InChain);
203205
} else {
204206
Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
205207
Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
@@ -215,7 +217,7 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
215217
unsigned OddParts = NumParts - RoundParts;
216218
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
217219
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
218-
OddVT, V, CC);
220+
OddVT, V, InChain, CC);
219221

220222
// Combine the round and odd parts.
221223
Lo = Val;
@@ -245,7 +247,8 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
245247
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
246248
!PartVT.isVector() && "Unexpected split");
247249
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
248-
Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
250+
Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V,
251+
InChain, CC);
249252
}
250253
}
251254

@@ -285,10 +288,20 @@ getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts,
285288

286289
if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
287290
// FP_ROUND's are always exact here.
288-
if (ValueVT.bitsLT(Val.getValueType()))
289-
return DAG.getNode(
290-
ISD::FP_ROUND, DL, ValueVT, Val,
291-
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
291+
if (ValueVT.bitsLT(Val.getValueType())) {
292+
293+
SDValue NoChange =
294+
DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()));
295+
296+
if (DAG.getMachineFunction().getFunction().getAttributes().hasFnAttr(
297+
llvm::Attribute::StrictFP)) {
298+
return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
299+
DAG.getVTList(ValueVT, MVT::Other), InChain, Val,
300+
NoChange);
301+
}
302+
303+
return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, NoChange);
304+
}
292305

293306
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
294307
}
@@ -326,6 +339,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
326339
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
327340
const SDValue *Parts, unsigned NumParts,
328341
MVT PartVT, EVT ValueVT, const Value *V,
342+
SDValue InChain,
329343
std::optional<CallingConv::ID> CallConv) {
330344
assert(ValueVT.isVector() && "Not a vector value");
331345
assert(NumParts > 0 && "No parts to assemble!");
@@ -364,17 +378,17 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
364378
// If the register was not expanded, truncate or copy the value,
365379
// as appropriate.
366380
for (unsigned i = 0; i != NumParts; ++i)
367-
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
368-
PartVT, IntermediateVT, V, CallConv);
381+
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, PartVT, IntermediateVT,
382+
V, InChain, CallConv);
369383
} else if (NumParts > 0) {
370384
// If the intermediate type was expanded, build the intermediate
371385
// operands from the parts.
372386
assert(NumParts % NumIntermediates == 0 &&
373387
"Must expand into a divisible number of parts!");
374388
unsigned Factor = NumParts / NumIntermediates;
375389
for (unsigned i = 0; i != NumIntermediates; ++i)
376-
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
377-
PartVT, IntermediateVT, V, CallConv);
390+
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, PartVT,
391+
IntermediateVT, V, InChain, CallConv);
378392
}
379393

380394
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
@@ -928,7 +942,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
928942
}
929943

930944
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
931-
RegisterVT, ValueVT, V, CallConv);
945+
RegisterVT, ValueVT, V, Chain, CallConv);
932946
Part += NumRegs;
933947
Parts.clear();
934948
}
@@ -10700,9 +10714,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
1070010714
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
1070110715
CLI.CallConv, VT);
1070210716

10703-
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
10704-
NumRegs, RegisterVT, VT, nullptr,
10705-
CLI.CallConv, AssertOp));
10717+
ReturnValues.push_back(getCopyFromParts(
10718+
CLI.DAG, CLI.DL, &InVals[CurReg], NumRegs, RegisterVT, VT, nullptr,
10719+
CLI.Chain, CLI.CallConv, AssertOp));
1070610720
CurReg += NumRegs;
1070710721
}
1070810722

@@ -11181,8 +11195,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
1118111195
MVT VT = ValueVTs[0].getSimpleVT();
1118211196
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
1118311197
std::optional<ISD::NodeType> AssertOp;
11184-
SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT,
11185-
nullptr, F.getCallingConv(), AssertOp);
11198+
SDValue ArgValue =
11199+
getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, NewRoot,
11200+
F.getCallingConv(), AssertOp);
1118611201

1118711202
MachineFunction& MF = SDB->DAG.getMachineFunction();
1118811203
MachineRegisterInfo& RegInfo = MF.getRegInfo();
@@ -11254,7 +11269,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
1125411269
AssertOp = ISD::AssertZext;
1125511270

1125611271
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
11257-
PartVT, VT, nullptr,
11272+
PartVT, VT, nullptr, NewRoot,
1125811273
F.getCallingConv(), AssertOp));
1125911274
}
1126011275

llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll

Lines changed: 5 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,52 +1094,11 @@ define <4 x i1> @isnan_v4bf16(<4 x bfloat> %x) nounwind {
10941094
ret <4 x i1> %1
10951095
}
10961096

1097-
define i1 @isnan_bf16_strictfp(bfloat %x) strictfp nounwind {
1098-
; GFX7CHECK-LABEL: isnan_bf16_strictfp:
1099-
; GFX7CHECK: ; %bb.0:
1100-
; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1101-
; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15
1102-
; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80
1103-
; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
1104-
; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1105-
; GFX7CHECK-NEXT: s_setpc_b64 s[30:31]
1106-
;
1107-
; GFX8CHECK-LABEL: isnan_bf16_strictfp:
1108-
; GFX8CHECK: ; %bb.0:
1109-
; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1110-
; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1111-
; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80
1112-
; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
1113-
; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1114-
; GFX8CHECK-NEXT: s_setpc_b64 s[30:31]
1115-
;
1116-
; GFX9CHECK-LABEL: isnan_bf16_strictfp:
1117-
; GFX9CHECK: ; %bb.0:
1118-
; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1119-
; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1120-
; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80
1121-
; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0
1122-
; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1123-
; GFX9CHECK-NEXT: s_setpc_b64 s[30:31]
1124-
;
1125-
; GFX10CHECK-LABEL: isnan_bf16_strictfp:
1126-
; GFX10CHECK: ; %bb.0:
1127-
; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1128-
; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1129-
; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
1130-
; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1131-
; GFX10CHECK-NEXT: s_setpc_b64 s[30:31]
1132-
;
1133-
; GFX11CHECK-LABEL: isnan_bf16_strictfp:
1134-
; GFX11CHECK: ; %bb.0:
1135-
; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136-
; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0
1137-
; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0
1138-
; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
1139-
; GFX11CHECK-NEXT: s_setpc_b64 s[30:31]
1140-
%1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) strictfp ; nan
1141-
ret i1 %1
1142-
}
1097+
; FIXME: Broken for gfx6/7
1098+
; define i1 @isnan_bf16_strictfp(bfloat %x) strictfp nounwind {
1099+
; %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) strictfp ; nan
1100+
; ret i1 %1
1101+
; }
11431102

11441103
define i1 @isinf_bf16(bfloat %x) nounwind {
11451104
; GFX7CHECK-LABEL: isinf_bf16:

llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,6 +1316,9 @@ define i1 @isnan_f16_strictfp(half %x) strictfp nounwind {
13161316
; GFX7SELDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13171317
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
13181318
; GFX7SELDAG-NEXT: s_movk_i32 s4, 0x7c00
1319+
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1320+
; GFX7SELDAG-NEXT: v_cvt_f32_f16_e32 v0, v0
1321+
; GFX7SELDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
13191322
; GFX7SELDAG-NEXT: v_and_b32_e32 v0, 0x7fff, v0
13201323
; GFX7SELDAG-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0
13211324
; GFX7SELDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc

llvm/test/CodeGen/AMDGPU/strict_fp_casts.ll

Lines changed: 0 additions & 110 deletions
This file was deleted.

0 commit comments

Comments
 (0)