Skip to content

Commit 0d493ed

Browse files
committed
Revert 4 last AMDGPU commits to unbreak Windows bots
Revert "AMDGPU: Try to fix build error with old gcc" This reverts commit c7ad12d. Revert "AMDGPU: Use umin in set.rounding expansion" This reverts commit a56f0b5. Revert "AMDGPU: Optimize set_rounding if input is known to fit in 2 bits (#88588)" This reverts commit b4e751e. Revert "AMDGPU: Implement llvm.set.rounding (#88587)" This reverts commit 9731b77.
1 parent a98a6e9 commit 0d493ed

File tree

8 files changed

+0
-1890
lines changed

8 files changed

+0
-1890
lines changed

llvm/docs/AMDGPUUsage.rst

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1157,12 +1157,6 @@ The AMDGPU backend implements the following LLVM IR intrinsics.
11571157
register do not exactly match the FLT_ROUNDS values,
11581158
so a conversion is performed.
11591159

1160-
:ref:`llvm.set.rounding<int_set_rounding>` Input value expected to be one of the valid results
1161-
from '``llvm.get.rounding``'. Rounding mode is
1162-
undefined if not passed a valid input. This should be
1163-
a wave uniform value. In case of a divergent input
1164-
value, the first active lane's value will be used.
1165-
11661160
:ref:`llvm.get.fpenv<int_get_fpenv>` Returns the current value of the AMDGPU floating point environment.
11671161
This stores information related to the current rounding mode,
11681162
denormalization mode, enabled traps, and floating point exceptions.

llvm/docs/LangRef.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26739,8 +26739,6 @@ specified by C standard:
2673926739
Other values may be used to represent additional rounding modes, supported by a
2674026740
target. These values are target-specific.
2674126741

26742-
.. _int_set_rounding:
26743-
2674426742
'``llvm.set.rounding``' Intrinsic
2674526743
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2674626744

llvm/docs/ReleaseNotes.rst

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,6 @@ Changes to the AMDGPU Backend
8181

8282
* Implemented the ``llvm.get.fpenv`` and ``llvm.set.fpenv`` intrinsics.
8383

84-
* Implemented :ref:`llvm.get.rounding <int_get_rounding>` and :ref:`llvm.set.rounding <int_set_rounding>`
85-
8684
Changes to the ARM Backend
8785
--------------------------
8886
* FEAT_F32MM is no longer activated by default when using `+sve` on v8.6-A or greater. The feature is still available and can be used by adding `+f32mm` to the command line options.

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 0 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
877877

878878
setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
879879
setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
880-
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
881880
setOperationAction(ISD::GET_FPENV, MVT::i64, Custom);
882881
setOperationAction(ISD::SET_FPENV, MVT::i64, Custom);
883882

@@ -4060,91 +4059,6 @@ SDValue SITargetLowering::lowerGET_ROUNDING(SDValue Op,
40604059
return DAG.getMergeValues({Result, GetReg.getValue(1)}, SL);
40614060
}
40624061

4063-
SDValue SITargetLowering::lowerSET_ROUNDING(SDValue Op,
4064-
SelectionDAG &DAG) const {
4065-
SDLoc SL(Op);
4066-
4067-
SDValue NewMode = Op.getOperand(1);
4068-
assert(NewMode.getValueType() == MVT::i32);
4069-
4070-
// Index a table of 4-bit entries mapping from the C FLT_ROUNDS values to the
4071-
// hardware MODE.fp_round values.
4072-
if (auto *ConstMode = dyn_cast<ConstantSDNode>(NewMode)) {
4073-
uint32_t ClampedVal = std::min(
4074-
static_cast<uint32_t>(ConstMode->getZExtValue()),
4075-
static_cast<uint32_t>(AMDGPU::TowardZeroF32_TowardNegativeF64));
4076-
NewMode = DAG.getConstant(
4077-
AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal), SL, MVT::i32);
4078-
} else {
4079-
// If we know the input can only be one of the supported standard modes in
4080-
// the range 0-3, we can use a simplified mapping to hardware values.
4081-
KnownBits KB = DAG.computeKnownBits(NewMode);
4082-
const bool UseReducedTable = KB.countMinLeadingZeros() >= 30;
4083-
// The supported standard values are 0-3. The extended values start at 8. We
4084-
// need to offset by 4 if the value is in the extended range.
4085-
4086-
if (UseReducedTable) {
4087-
// Truncate to the low 32-bits.
4088-
SDValue BitTable = DAG.getConstant(
4089-
AMDGPU::FltRoundToHWConversionTable & 0xffff, SL, MVT::i32);
4090-
4091-
SDValue Two = DAG.getConstant(2, SL, MVT::i32);
4092-
SDValue RoundModeTimesNumBits =
4093-
DAG.getNode(ISD::SHL, SL, MVT::i32, NewMode, Two);
4094-
4095-
NewMode =
4096-
DAG.getNode(ISD::SRL, SL, MVT::i32, BitTable, RoundModeTimesNumBits);
4097-
4098-
// TODO: SimplifyDemandedBits on the setreg source here can likely reduce
4099-
// the table extracted bits into inline immediates.
4100-
} else {
4101-
// table_index = umin(value, value - 4)
4102-
// MODE.fp_round = (bit_table >> (table_index << 2)) & 0xf
4103-
SDValue BitTable =
4104-
DAG.getConstant(AMDGPU::FltRoundToHWConversionTable, SL, MVT::i64);
4105-
4106-
SDValue Four = DAG.getConstant(4, SL, MVT::i32);
4107-
SDValue OffsetEnum = DAG.getNode(ISD::SUB, SL, MVT::i32, NewMode, Four);
4108-
SDValue IndexVal =
4109-
DAG.getNode(ISD::UMIN, SL, MVT::i32, NewMode, OffsetEnum);
4110-
4111-
SDValue Two = DAG.getConstant(2, SL, MVT::i32);
4112-
SDValue RoundModeTimesNumBits =
4113-
DAG.getNode(ISD::SHL, SL, MVT::i32, IndexVal, Two);
4114-
4115-
SDValue TableValue =
4116-
DAG.getNode(ISD::SRL, SL, MVT::i64, BitTable, RoundModeTimesNumBits);
4117-
SDValue TruncTable = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, TableValue);
4118-
4119-
// No need to mask out the high bits since the setreg will ignore them
4120-
// anyway.
4121-
NewMode = TruncTable;
4122-
}
4123-
4124-
// Insert a readfirstlane in case the value is a VGPR. We could do this
4125-
// earlier and keep more operations scalar, but that interferes with
4126-
// combining the source.
4127-
SDValue ReadFirstLaneID =
4128-
DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, SL, MVT::i32);
4129-
NewMode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SL, MVT::i32,
4130-
ReadFirstLaneID, NewMode);
4131-
}
4132-
4133-
// N.B. The setreg will be later folded into s_round_mode on supported
4134-
// targets.
4135-
SDValue IntrinID =
4136-
DAG.getTargetConstant(Intrinsic::amdgcn_s_setreg, SL, MVT::i32);
4137-
uint32_t BothRoundHwReg =
4138-
AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 4);
4139-
SDValue RoundBothImm = DAG.getTargetConstant(BothRoundHwReg, SL, MVT::i32);
4140-
4141-
SDValue SetReg =
4142-
DAG.getNode(ISD::INTRINSIC_VOID, SL, Op->getVTList(), Op.getOperand(0),
4143-
IntrinID, RoundBothImm, NewMode);
4144-
4145-
return SetReg;
4146-
}
4147-
41484062
SDValue SITargetLowering::lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const {
41494063
if (Op->isDivergent())
41504064
return SDValue();
@@ -5840,8 +5754,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
58405754
return LowerSTACKSAVE(Op, DAG);
58415755
case ISD::GET_ROUNDING:
58425756
return lowerGET_ROUNDING(Op, DAG);
5843-
case ISD::SET_ROUNDING:
5844-
return lowerSET_ROUNDING(Op, DAG);
58455757
case ISD::PREFETCH:
58465758
return lowerPREFETCH(Op, DAG);
58475759
case ISD::FP_EXTEND:

llvm/lib/Target/AMDGPU/SIISelLowering.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
422422
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
423423
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
424424
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
425-
SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
426425

427426
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
428427
SDValue lowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;

llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp

Lines changed: 0 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -174,122 +174,3 @@ static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
174174
static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
175175
HWTowardNegative, HWTowardPositive)) ==
176176
TowardNegativeF32_TowardPositiveF64);
177-
178-
// Decode FLT_ROUNDS into the hardware value where the two rounding modes are
179-
// the same and use a standard value
180-
static constexpr uint64_t encodeFltRoundsToHWTableSame(uint32_t HWVal,
181-
uint32_t FltRoundsVal) {
182-
if (FltRoundsVal > TowardNegative)
183-
FltRoundsVal -= ExtendedFltRoundOffset;
184-
185-
return static_cast<uint64_t>(getModeRegisterRoundMode(HWVal, HWVal))
186-
<< (FltRoundsVal << 2);
187-
}
188-
189-
/// Decode FLT_ROUNDS into the hardware value where the two rounding modes
190-
/// different and use an extended value.
191-
static constexpr uint64_t encodeFltRoundsToHWTable(uint32_t HWF32Val,
192-
uint32_t HWF64Val,
193-
uint32_t FltRoundsVal) {
194-
if (FltRoundsVal > TowardNegative)
195-
FltRoundsVal -= ExtendedFltRoundOffset;
196-
return static_cast<uint64_t>(getModeRegisterRoundMode(HWF32Val, HWF64Val))
197-
<< (FltRoundsVal << 2);
198-
}
199-
200-
constexpr uint64_t AMDGPU::FltRoundToHWConversionTable =
201-
encodeFltRoundsToHWTableSame(HWTowardZero, TowardZeroF32_TowardZeroF64) |
202-
encodeFltRoundsToHWTableSame(HWNearestTiesToEven,
203-
NearestTiesToEvenF32_NearestTiesToEvenF64) |
204-
encodeFltRoundsToHWTableSame(HWTowardPositive,
205-
TowardPositiveF32_TowardPositiveF64) |
206-
encodeFltRoundsToHWTableSame(HWTowardNegative,
207-
TowardNegativeF32_TowardNegativeF64) |
208-
209-
encodeFltRoundsToHWTable(HWTowardZero, HWNearestTiesToEven,
210-
TowardZeroF32_NearestTiesToEvenF64) |
211-
encodeFltRoundsToHWTable(HWTowardZero, HWTowardPositive,
212-
TowardZeroF32_TowardPositiveF64) |
213-
encodeFltRoundsToHWTable(HWTowardZero, HWTowardNegative,
214-
TowardZeroF32_TowardNegativeF64) |
215-
216-
encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardZero,
217-
NearestTiesToEvenF32_TowardZeroF64) |
218-
encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardPositive,
219-
NearestTiesToEvenF32_TowardPositiveF64) |
220-
encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardNegative,
221-
NearestTiesToEvenF32_TowardNegativeF64) |
222-
223-
encodeFltRoundsToHWTable(HWTowardPositive, HWTowardZero,
224-
TowardPositiveF32_TowardZeroF64) |
225-
encodeFltRoundsToHWTable(HWTowardPositive, HWNearestTiesToEven,
226-
TowardPositiveF32_NearestTiesToEvenF64) |
227-
encodeFltRoundsToHWTable(HWTowardPositive, HWTowardNegative,
228-
TowardPositiveF32_TowardNegativeF64) |
229-
230-
encodeFltRoundsToHWTable(HWTowardNegative, HWTowardZero,
231-
TowardNegativeF32_TowardZeroF64) |
232-
encodeFltRoundsToHWTable(HWTowardNegative, HWNearestTiesToEven,
233-
TowardNegativeF32_NearestTiesToEvenF64) |
234-
encodeFltRoundsToHWTable(HWTowardNegative, HWTowardPositive,
235-
TowardNegativeF32_TowardPositiveF64);
236-
237-
/// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
238-
static constexpr uint32_t
239-
decodeFltRoundToHWConversionTable(uint64_t FltRoundToHWConversionTable,
240-
uint32_t FltRounds) {
241-
uint32_t IndexVal = FltRounds;
242-
if (IndexVal > TowardNegative)
243-
IndexVal -= ExtendedFltRoundOffset;
244-
return (FltRoundToHWConversionTable >> (IndexVal << 2)) & 0xf;
245-
}
246-
247-
uint32_t AMDGPU::decodeFltRoundToHWConversionTable(uint32_t FltRounds) {
248-
return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
249-
FltRounds);
250-
}
251-
252-
static constexpr uint32_t decodeFltRoundToHW(uint32_t FltRounds) {
253-
return ::decodeFltRoundToHWConversionTable(FltRoundToHWConversionTable,
254-
FltRounds);
255-
}
256-
257-
// Verify evaluation of FltRoundToHWConversionTable
258-
259-
static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardZero) ==
260-
getModeRegisterRoundMode(HWTowardZero, HWTowardZero));
261-
static_assert(decodeFltRoundToHW(AMDGPUFltRounds::NearestTiesToEven) ==
262-
getModeRegisterRoundMode(HWNearestTiesToEven,
263-
HWNearestTiesToEven));
264-
static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardPositive) ==
265-
getModeRegisterRoundMode(HWTowardPositive, HWTowardPositive));
266-
static_assert(decodeFltRoundToHW(AMDGPUFltRounds::TowardNegative) ==
267-
getModeRegisterRoundMode(HWTowardNegative, HWTowardNegative));
268-
269-
static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardPositiveF64) ==
270-
getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardPositive));
271-
static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardNegativeF64) ==
272-
getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardNegative));
273-
static_assert(decodeFltRoundToHW(NearestTiesToEvenF32_TowardZeroF64) ==
274-
getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardZero));
275-
276-
static_assert(decodeFltRoundToHW(TowardPositiveF32_NearestTiesToEvenF64) ==
277-
getModeRegisterRoundMode(HWTowardPositive, HWNearestTiesToEven));
278-
static_assert(decodeFltRoundToHW(TowardPositiveF32_TowardNegativeF64) ==
279-
getModeRegisterRoundMode(HWTowardPositive, HWTowardNegative));
280-
static_assert(decodeFltRoundToHW(TowardPositiveF32_TowardZeroF64) ==
281-
getModeRegisterRoundMode(HWTowardPositive, HWTowardZero));
282-
283-
static_assert(decodeFltRoundToHW(TowardNegativeF32_NearestTiesToEvenF64) ==
284-
getModeRegisterRoundMode(HWTowardNegative, HWNearestTiesToEven));
285-
static_assert(decodeFltRoundToHW(TowardNegativeF32_TowardPositiveF64) ==
286-
getModeRegisterRoundMode(HWTowardNegative, HWTowardPositive));
287-
static_assert(decodeFltRoundToHW(TowardNegativeF32_TowardZeroF64) ==
288-
getModeRegisterRoundMode(HWTowardNegative, HWTowardZero));
289-
290-
static_assert(decodeFltRoundToHW(TowardZeroF32_NearestTiesToEvenF64) ==
291-
getModeRegisterRoundMode(HWTowardZero, HWNearestTiesToEven));
292-
static_assert(decodeFltRoundToHW(TowardZeroF32_TowardPositiveF64) ==
293-
getModeRegisterRoundMode(HWTowardZero, HWTowardPositive));
294-
static_assert(decodeFltRoundToHW(TowardZeroF32_TowardNegativeF64) ==
295-
getModeRegisterRoundMode(HWTowardZero, HWTowardNegative));

llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -144,13 +144,6 @@ static constexpr uint32_t F64FltRoundOffset = 2;
144144
// values.
145145
extern const uint64_t FltRoundConversionTable;
146146

147-
// Bit indexed table to convert from FLT_ROUNDS values to hardware rounding mode
148-
// values
149-
extern const uint64_t FltRoundToHWConversionTable;
150-
151-
/// Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
152-
uint32_t decodeFltRoundToHWConversionTable(uint32_t FltRounds);
153-
154147
} // end namespace AMDGPU
155148

156149
} // end namespace llvm

0 commit comments

Comments
 (0)