@@ -870,6 +870,8 @@ def S_CBRANCH_I_FORK : SOPK_Pseudo <
870
870
871
871
// This is hasSideEffects to allow its use in readcyclecounter selection.
872
872
// FIXME: Need to truncate immediate to 16-bits.
873
+ // FIXME: Missing mode register use. Should have separate pseudos for
874
+ // known may read MODE and only read MODE.
873
875
def S_GETREG_B32 : SOPK_Pseudo <
874
876
"s_getreg_b32",
875
877
(outs SReg_32:$sdst), (ins hwreg:$simm16),
@@ -1424,6 +1426,66 @@ def : GCNPat <
1424
1426
(S_WAIT_EVENT (i16 0))
1425
1427
>;
1426
1428
1429
+ // The first 10 bits of the mode register are the core FP mode on all
1430
+ // subtargets.
1431
+ //
1432
+ // The high bits include additional fields, intermixed with some
1433
+ // non-floating point environment information. We extract the full
1434
+ // register and clear non-relevant bits.
1435
+ //
1436
+ // EXCP_EN covers floating point exceptions, but also some other
1437
+ // non-FP exceptions.
1438
+ //
1439
+ // Bits 12-18 cover the relevant exception mask on all subtargets.
1440
+ //
1441
+ // FIXME: Bit 18 is int_div0, should this be in the FP environment? I
1442
+ // think the only source is v_rcp_iflag_i32.
1443
+ //
1444
+ // On GFX9+:
1445
+ // Bit 23 is the additional FP16_OVFL mode.
1446
+ //
1447
+ // Bits 19, 20, and 21 cover non-FP exceptions and differ between
1448
+ // gfx9/10/11, so we ignore them here.
1449
+
1450
+ // TODO: Would it be cheaper to emit multiple s_getreg_b32 calls for
1451
+ // the ranges and combine the results?
1452
+
1453
+ defvar fp_round_mask = !add(!shl(1, 4), -1);
1454
+ defvar fp_denorm_mask = !shl(!add(!shl(1, 4), -1), 4);
1455
+ defvar dx10_clamp_mask = !shl(1, 8);
1456
+ defvar ieee_mode_mask = !shl(1, 9);
1457
+
1458
+ // Covers fp_round, fp_denorm, dx10_clamp, and IEEE bit.
1459
+ defvar fpmode_mask =
1460
+ !or(fp_round_mask, fp_denorm_mask, dx10_clamp_mask, ieee_mode_mask);
1461
+
1462
+ defvar fp_excp_en_mask = !shl(!add(!shl(1, 7), -1), 12);
1463
+ defvar fp16_ovfl = !shl(1, 23);
1464
+ defvar fpmode_mask_gfx6plus = !or(fpmode_mask, fp_excp_en_mask);
1465
+ defvar fpmode_mask_gfx9plus = !or(fpmode_mask_gfx6plus, fp16_ovfl);
1466
+
1467
+ class GetFPModePat<int fpmode_mask> : GCNPat<
1468
+ (i32 get_fpmode),
1469
+ (S_AND_B32 (i32 fpmode_mask),
1470
+ (S_GETREG_B32 getHwRegImm<
1471
+ HWREG.MODE, 0,
1472
+ !add(!logtwo(fpmode_mask), 1)>.ret))
1473
+ >;
1474
+
1475
+ // TODO: Might be worth moving to custom lowering so the and is
1476
+ // exposed to demanded bits optimizations. Most users probably only
1477
+ // care about the rounding or denorm mode bits. We also can reduce the
1478
+ // demanded read from the getreg immediate.
1479
+ let SubtargetPredicate = isGFX9Plus in {
1480
+ // Last bit = FP16_OVFL
1481
+ def : GetFPModePat<fpmode_mask_gfx9plus>;
1482
+ }
1483
+
1484
+ // Last bit = EXCP_EN.int_div0
1485
+ let SubtargetPredicate = isNotGFX9Plus in {
1486
+ def : GetFPModePat<fpmode_mask_gfx6plus>;
1487
+ }
1488
+
1427
1489
//===----------------------------------------------------------------------===//
1428
1490
// SOP2 Patterns
1429
1491
//===----------------------------------------------------------------------===//
0 commit comments