@@ -174,3 +174,116 @@ static_assert(decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
174
174
static_assert (decodeIndexFltRoundConversionTable(getModeRegisterRoundMode(
175
175
HWTowardNegative, HWTowardPositive)) ==
176
176
TowardNegativeF32_TowardPositiveF64);
177
+
178
+ // Decode FLT_ROUNDS into the hardware value where the two rounding modes are
179
+ // the same and use a standard value
180
+ static constexpr uint64_t encodeFltRoundsToHWTableSame (uint32_t HWVal,
181
+ uint32_t FltRoundsVal) {
182
+ if (FltRoundsVal > TowardNegative)
183
+ FltRoundsVal -= ExtendedFltRoundOffset;
184
+
185
+ return static_cast <uint64_t >(getModeRegisterRoundMode (HWVal, HWVal))
186
+ << (FltRoundsVal << 2 );
187
+ }
188
+
189
+ // / Decode FLT_ROUNDS into the hardware value where the two rounding modes
190
+ // / different and use an extended value.
191
+ static constexpr uint64_t encodeFltRoundsToHWTable (uint32_t HWF32Val,
192
+ uint32_t HWF64Val,
193
+ uint32_t FltRoundsVal) {
194
+ if (FltRoundsVal > TowardNegative)
195
+ FltRoundsVal -= ExtendedFltRoundOffset;
196
+ return static_cast <uint64_t >(getModeRegisterRoundMode (HWF32Val, HWF64Val))
197
+ << (FltRoundsVal << 2 );
198
+ }
199
+
200
+ constexpr uint64_t AMDGPU::FltRoundToHWConversionTable =
201
+ encodeFltRoundsToHWTableSame (HWTowardZero, TowardZeroF32_TowardZeroF64) |
202
+ encodeFltRoundsToHWTableSame(HWNearestTiesToEven,
203
+ NearestTiesToEvenF32_NearestTiesToEvenF64) |
204
+ encodeFltRoundsToHWTableSame(HWTowardPositive,
205
+ TowardPositiveF32_TowardPositiveF64) |
206
+ encodeFltRoundsToHWTableSame(HWTowardNegative,
207
+ TowardNegativeF32_TowardNegativeF64) |
208
+
209
+ encodeFltRoundsToHWTable(HWTowardZero, HWNearestTiesToEven,
210
+ TowardZeroF32_NearestTiesToEvenF64) |
211
+ encodeFltRoundsToHWTable(HWTowardZero, HWTowardPositive,
212
+ TowardZeroF32_TowardPositiveF64) |
213
+ encodeFltRoundsToHWTable(HWTowardZero, HWTowardNegative,
214
+ TowardZeroF32_TowardNegativeF64) |
215
+
216
+ encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardZero,
217
+ NearestTiesToEvenF32_TowardZeroF64) |
218
+ encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardPositive,
219
+ NearestTiesToEvenF32_TowardPositiveF64) |
220
+ encodeFltRoundsToHWTable(HWNearestTiesToEven, HWTowardNegative,
221
+ NearestTiesToEvenF32_TowardNegativeF64) |
222
+
223
+ encodeFltRoundsToHWTable(HWTowardPositive, HWTowardZero,
224
+ TowardPositiveF32_TowardZeroF64) |
225
+ encodeFltRoundsToHWTable(HWTowardPositive, HWNearestTiesToEven,
226
+ TowardPositiveF32_NearestTiesToEvenF64) |
227
+ encodeFltRoundsToHWTable(HWTowardPositive, HWTowardNegative,
228
+ TowardPositiveF32_TowardNegativeF64) |
229
+
230
+ encodeFltRoundsToHWTable(HWTowardNegative, HWTowardZero,
231
+ TowardNegativeF32_TowardZeroF64) |
232
+ encodeFltRoundsToHWTable(HWTowardNegative, HWNearestTiesToEven,
233
+ TowardNegativeF32_NearestTiesToEvenF64) |
234
+ encodeFltRoundsToHWTable(HWTowardNegative, HWTowardPositive,
235
+ TowardNegativeF32_TowardPositiveF64);
236
+
237
+ // Verify evaluation of FltRoundToHWConversionTable
238
+
239
+ static_assert (decodeFltRoundToHWConversionTable(AMDGPUFltRounds::TowardZero) ==
240
+ getModeRegisterRoundMode(HWTowardZero, HWTowardZero));
241
+ static_assert (
242
+ decodeFltRoundToHWConversionTable (AMDGPUFltRounds::NearestTiesToEven) ==
243
+ getModeRegisterRoundMode(HWNearestTiesToEven, HWNearestTiesToEven));
244
+ static_assert (
245
+ decodeFltRoundToHWConversionTable (AMDGPUFltRounds::TowardPositive) ==
246
+ getModeRegisterRoundMode(HWTowardPositive, HWTowardPositive));
247
+ static_assert (
248
+ decodeFltRoundToHWConversionTable (AMDGPUFltRounds::TowardNegative) ==
249
+ getModeRegisterRoundMode(HWTowardNegative, HWTowardNegative));
250
+
251
+ static_assert (
252
+ decodeFltRoundToHWConversionTable (NearestTiesToEvenF32_TowardPositiveF64) ==
253
+ getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardPositive));
254
+ static_assert (
255
+ decodeFltRoundToHWConversionTable (NearestTiesToEvenF32_TowardNegativeF64) ==
256
+ getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardNegative));
257
+ static_assert (
258
+ decodeFltRoundToHWConversionTable (NearestTiesToEvenF32_TowardZeroF64) ==
259
+ getModeRegisterRoundMode(HWNearestTiesToEven, HWTowardZero));
260
+
261
+ static_assert (
262
+ decodeFltRoundToHWConversionTable (TowardPositiveF32_NearestTiesToEvenF64) ==
263
+ getModeRegisterRoundMode(HWTowardPositive, HWNearestTiesToEven));
264
+ static_assert (
265
+ decodeFltRoundToHWConversionTable (TowardPositiveF32_TowardNegativeF64) ==
266
+ getModeRegisterRoundMode(HWTowardPositive, HWTowardNegative));
267
+ static_assert (
268
+ decodeFltRoundToHWConversionTable (TowardPositiveF32_TowardZeroF64) ==
269
+ getModeRegisterRoundMode(HWTowardPositive, HWTowardZero));
270
+
271
+ static_assert (
272
+ decodeFltRoundToHWConversionTable (TowardNegativeF32_NearestTiesToEvenF64) ==
273
+ getModeRegisterRoundMode(HWTowardNegative, HWNearestTiesToEven));
274
+ static_assert (
275
+ decodeFltRoundToHWConversionTable (TowardNegativeF32_TowardPositiveF64) ==
276
+ getModeRegisterRoundMode(HWTowardNegative, HWTowardPositive));
277
+ static_assert (
278
+ decodeFltRoundToHWConversionTable (TowardNegativeF32_TowardZeroF64) ==
279
+ getModeRegisterRoundMode(HWTowardNegative, HWTowardZero));
280
+
281
+ static_assert (
282
+ decodeFltRoundToHWConversionTable (TowardZeroF32_NearestTiesToEvenF64) ==
283
+ getModeRegisterRoundMode(HWTowardZero, HWNearestTiesToEven));
284
+ static_assert (
285
+ decodeFltRoundToHWConversionTable (TowardZeroF32_TowardPositiveF64) ==
286
+ getModeRegisterRoundMode(HWTowardZero, HWTowardPositive));
287
+ static_assert (
288
+ decodeFltRoundToHWConversionTable (TowardZeroF32_TowardNegativeF64) ==
289
+ getModeRegisterRoundMode(HWTowardZero, HWTowardNegative));
0 commit comments