Skip to content

Commit 030d075

Browse files
authored
[AMDGPU] Fix bf16 inv2pi inline constant hadling (#82283)
Inline constant 1/(2*pi) has the truncated value 0x3e22. According to the spec it is not rounded. A bf16 value in a nutshall is a fp32 value with cleared 16 bites of mantissa. The value 0x3e22 converted to fp32 is 0.158203125 and the next representable value 0x3e23 means 0.1591796875. The fp32 value of 1/(2*pi) = 0.15915494 cannot be represented in bf16. Although since bf16 values are essentailly truncated fp32 values we can use 0.15915494 as an idiomatic representation of 1/(2*pi) inline constant. This is also consistent with sp3 behaviour. The patch fixes the problem that value we are printing for inv2pi inline constant is not parsed as inv2pi by the asm parser and gets rounded.
1 parent 13dce35 commit 030d075

File tree

2 files changed

+26
-8
lines changed

2 files changed

+26
-8
lines changed

llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2230,6 +2230,24 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22302230
// in predicate methods (isLiteralImm())
22312231
llvm_unreachable("fp literal in 64-bit integer instruction.");
22322232

2233+
case AMDGPU::OPERAND_REG_IMM_BF16:
2234+
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
2235+
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
2236+
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
2237+
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
2238+
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
2239+
case AMDGPU::OPERAND_REG_IMM_V2BF16:
2240+
if (AsmParser->hasInv2PiInlineImm() && Literal == 0x3fc45f306725feed) {
2241+
// This is the 1/(2*pi) which is going to be truncated to bf16 with the
2242+
// loss of precision. The constant represents ideomatic fp32 value of
2243+
// 1/(2*pi) = 0.15915494 since bf16 is in fact fp32 with cleared low 16
2244+
// bits. Prevent rounding below.
2245+
Inst.addOperand(MCOperand::createImm(0x3e22));
2246+
setImmKindLiteral();
2247+
return;
2248+
}
2249+
[[fallthrough]];
2250+
22332251
case AMDGPU::OPERAND_REG_IMM_INT32:
22342252
case AMDGPU::OPERAND_REG_IMM_FP32:
22352253
case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
@@ -2238,24 +2256,17 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
22382256
case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
22392257
case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
22402258
case AMDGPU::OPERAND_REG_IMM_INT16:
2241-
case AMDGPU::OPERAND_REG_IMM_BF16:
22422259
case AMDGPU::OPERAND_REG_IMM_FP16:
2243-
case AMDGPU::OPERAND_REG_IMM_BF16_DEFERRED:
22442260
case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
22452261
case AMDGPU::OPERAND_REG_INLINE_C_INT16:
2246-
case AMDGPU::OPERAND_REG_INLINE_C_BF16:
22472262
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
22482263
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
2249-
case AMDGPU::OPERAND_REG_INLINE_C_V2BF16:
22502264
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
22512265
case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
2252-
case AMDGPU::OPERAND_REG_INLINE_AC_BF16:
22532266
case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
22542267
case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
2255-
case AMDGPU::OPERAND_REG_INLINE_AC_V2BF16:
22562268
case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
22572269
case AMDGPU::OPERAND_REG_IMM_V2INT16:
2258-
case AMDGPU::OPERAND_REG_IMM_V2BF16:
22592270
case AMDGPU::OPERAND_REG_IMM_V2FP16:
22602271
case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
22612272
case AMDGPU::OPERAND_REG_IMM_V2FP32:

llvm/test/MC/AMDGPU/bf16_imm.s

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,17 @@ v_dot2_bf16_bf16 v2, v0, 4.0, v2
3434
v_dot2_bf16_bf16 v2, v0, -4.0, v2
3535
// CHECK: v_dot2_bf16_bf16 v2, v0, -4.0, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xef,0x09,0x04]
3636

37-
// FIXME: pi/2 rounded value is incorrect in the inst printer.
37+
// Check 1/(2*pi) rounded value and ideomatic fp32 0.15915494 value
38+
// which cannot be accurately represented in bf16.
3839

3940
v_dot2_bf16_bf16 v2, v0, 0.158203125, v2
4041
// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
4142

43+
v_dot2_bf16_bf16 v2, v0, 0.15915494, v2
44+
// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
45+
4246
v_dot2_bf16_bf16 v2, v0, 0x3e22, v2
4347
// CHECK: v_dot2_bf16_bf16 v2, v0, 0.15915494, v2 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0xf1,0x09,0x04]
48+
49+
v_dot2_bf16_bf16 v2, v0, v2, 0.15915494
50+
// CHECK: v_dot2_bf16_bf16 v2, v0, v2, 0.15915494 ; encoding: [0x02,0x00,0x67,0xd6,0x00,0x05,0xe2,0x03]

0 commit comments

Comments
 (0)