@@ -174,3 +174,122 @@ static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
174
174
static_assert (decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
175
175
HWTowardNegative, HWTowardPositive)) ==
176
176
TowardNegativeF32_TowardPositiveF64);
177
+
178
+ // Decode FLT_ROUNDS into the hardware value where the two rounding modes are
179
+ // the same and use a standard value
180
+ static constexpr uint64_t encodeFltRoundsToHWTableSame (uint32_t HWVal,
181
+ uint32_t FltRoundsVal) {
182
+ if (FltRoundsVal > TowardNegative)
183
+ FltRoundsVal -= ExtendedFltRoundOffset;
184
+
185
+ return static_cast <uint64_t >(getModeRegisterRoundMode (HWVal, HWVal))
186
+ << (FltRoundsVal << 2 );
187
+ }
188
+
189
+ // / Decode FLT_ROUNDS into the hardware value where the two rounding modes
190
+ // / different and use an extended value.
191
+ static constexpr uint64_t encodeFltRoundsToHWTable (uint32_t HWF32Val,
192
+ uint32_t HWF64Val,
193
+ uint32_t FltRoundsVal) {
194
+ if (FltRoundsVal > TowardNegative)
195
+ FltRoundsVal -= ExtendedFltRoundOffset;
196
+ return static_cast <uint64_t >(getModeRegisterRoundMode (HWF32Val, HWF64Val))
197
+ << (FltRoundsVal << 2 );
198
+ }
199
+
200
+ const uint64_t AMDGPU::FltRoundToHWConversionTable =
201
+ encodeFltRoundsToHWTableSame (HWTowardZero, TowardZeroF32_TowardZeroF64) |
202
+ encodeFltRoundsToHWTableSame(HWNearestTiesToEven,
203
+ NearestTiesToEvenF32_NearestTiesToEvenF64) |
204
+ encodeFltRoundsToHWTableSame(HWTowardPositive,
205
+ TowardPositiveF32_TowardPositiveF64) |
206
+ encodeFltRoundsToHWTableSame(HWTowardNegative,
207
+ TowardNegativeF32_TowardNegativeF64) |
208
+
209
+ encodeFltRoundsToHWTable(HWTowardZero, HWNearestTiesToEven,
210
+ TowardZeroF32_NearestTiesToEvenF64) |
211
+ encodeFltRoundsToHWTable(HWTowardZero, HWTowardPositive,
212
+ TowardZeroF32_TowardPositiveF64) |
213
+ encodeFltRoundsToHWTable(HWTowardZero, HWTowardNegative,
214
+ TowardZeroF32_TowardNegativeF64) |
215
+
216
+ encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardZero,
217
+ NearestTiesToEvenF32_TowardZeroF64) |
218
+ encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardPositive,
219
+ NearestTiesToEvenF32_TowardPositiveF64) |
220
+ encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardNegative,
221
+ NearestTiesToEvenF32_TowardNegativeF64) |
222
+
223
+ encodeFltRoundsToHWTable(HWTowardPositive, HWTowardZero,
224
+ TowardPositiveF32_TowardZeroF64) |
225
+ encodeFltRoundsToHWTable(HWTowardPositive, HWNearestTiesToEven,
226
+ TowardPositiveF32_NearestTiesToEvenF64) |
227
+ encodeFltRoundsToHWTable(HWTowardPositive, HWTowardNegative,
228
+ TowardPositiveF32_TowardNegativeF64) |
229
+
230
+ encodeFltRoundsToHWTable(HWTowardNegative, HWTowardZero,
231
+ TowardNegativeF32_TowardZeroF64) |
232
+ encodeFltRoundsToHWTable(HWTowardNegative, HWNearestTiesToEven,
233
+ TowardNegativeF32_NearestTiesToEvenF64) |
234
+ encodeFltRoundsToHWTable(HWTowardNegative, HWTowardPositive,
235
+ TowardNegativeF32_TowardPositiveF64);
236
+
237
+ // / Read the hardware rounding mode equivalent of a AMDGPUFltRounds value.
238
+ static constexpr uint32_t
239
+ decodeFltRoundToHWConversionTable (uint64_t FltRoundToHWConversionTable,
240
+ uint32_t FltRounds) {
241
+ uint32_t IndexVal = FltRounds;
242
+ if (IndexVal > TowardNegative)
243
+ IndexVal -= ExtendedFltRoundOffset;
244
+ return (FltRoundToHWConversionTable >> (IndexVal << 2 )) & 0xf ;
245
+ }
246
+
247
+ uint32_t AMDGPU::decodeFltRoundToHWConversionTable (uint32_t FltRounds) {
248
+ return ::decodeFltRoundToHWConversionTable (FltRoundToHWConversionTable,
249
+ FltRounds);
250
+ }
251
+
252
+ static constexpr uint32_t decodeFltRoundToHW (uint32_t FltRounds) {
253
+ return ::decodeFltRoundToHWConversionTable (FltRoundToHWConversionTable,
254
+ FltRounds);
255
+ }
256
+
257
+ // Verify evaluation of FltRoundToHWConversionTable
258
+
259
+ static_assert (decodeFltRoundToHW(AMDGPUFltRounds::TowardZero) ==
260
+ getModeRegisterRoundMode(HWTowardZero, HWTowardZero));
261
+ static_assert (decodeFltRoundToHW(AMDGPUFltRounds::NearestTiesToEven) ==
262
+ getModeRegisterRoundMode(HWNearestTiesToEven,
263
+ HWNearestTiesToEven));
264
+ static_assert (decodeFltRoundToHW(AMDGPUFltRounds::TowardPositive) ==
265
+ getModeRegisterRoundMode(HWTowardPositive, HWTowardPositive));
266
+ static_assert (decodeFltRoundToHW(AMDGPUFltRounds::TowardNegative) ==
267
+ getModeRegisterRoundMode(HWTowardNegative, HWTowardNegative));
268
+
269
+ static_assert (decodeFltRoundToHW(NearestTiesToEvenF32_TowardPositiveF64) ==
270
+ getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardPositive));
271
+ static_assert (decodeFltRoundToHW(NearestTiesToEvenF32_TowardNegativeF64) ==
272
+ getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardNegative));
273
+ static_assert (decodeFltRoundToHW(NearestTiesToEvenF32_TowardZeroF64) ==
274
+ getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardZero));
275
+
276
+ static_assert (decodeFltRoundToHW(TowardPositiveF32_NearestTiesToEvenF64) ==
277
+ getModeRegisterRoundMode(HWTowardPositive, HWNearestTiesToEven));
278
+ static_assert (decodeFltRoundToHW(TowardPositiveF32_TowardNegativeF64) ==
279
+ getModeRegisterRoundMode(HWTowardPositive, HWTowardNegative));
280
+ static_assert (decodeFltRoundToHW(TowardPositiveF32_TowardZeroF64) ==
281
+ getModeRegisterRoundMode(HWTowardPositive, HWTowardZero));
282
+
283
+ static_assert (decodeFltRoundToHW(TowardNegativeF32_NearestTiesToEvenF64) ==
284
+ getModeRegisterRoundMode(HWTowardNegative, HWNearestTiesToEven));
285
+ static_assert (decodeFltRoundToHW(TowardNegativeF32_TowardPositiveF64) ==
286
+ getModeRegisterRoundMode(HWTowardNegative, HWTowardPositive));
287
+ static_assert (decodeFltRoundToHW(TowardNegativeF32_TowardZeroF64) ==
288
+ getModeRegisterRoundMode(HWTowardNegative, HWTowardZero));
289
+
290
+ static_assert (decodeFltRoundToHW(TowardZeroF32_NearestTiesToEvenF64) ==
291
+ getModeRegisterRoundMode(HWTowardZero, HWNearestTiesToEven));
292
+ static_assert (decodeFltRoundToHW(TowardZeroF32_TowardPositiveF64) ==
293
+ getModeRegisterRoundMode(HWTowardZero, HWTowardPositive));
294
+ static_assert (decodeFltRoundToHW(TowardZeroF32_TowardNegativeF64) ==
295
+ getModeRegisterRoundMode(HWTowardZero, HWTowardNegative));
0 commit comments