Skip to content

Commit 6eab9dd

Browse files
authored
[NVPTX] remove incorrect NVPTX intrinsic transformations (#76870)
`nvvm_fabs_f` `nvvm_fabs_ftz_f` Unfortunately, llvm fabs is not equivalent to these intrinsics since llvm fabs is defined to only set the sign bit to zero while these can also flush subnormal inputs and modify NaNs. `nvvm_round_d` `nvvm_round_f` `nvvm_round_ftz_f` llvm.nvvm.round uses RNI, while llvm.round codegens to RZI. LLVM defines llvm.round to use the same rounding as libm `round[f]()`, which is not necessary the same as how we define llvm.nvvm.round. `nvvm_sqrt_rn_f` `nvvm_sqrt_rn_ftz_f` sqrt may be lowered to a less precise version of sqrt, such as sqrt.approx in NVPTX depending on factors such as the value of -nvptx-prec-sqrtf32. These intrinsics should always become the corresponding NVPTX instructions. `nvvm_add_rn_d` `nvvm_add_rn_f` `nvvm_add_rn_ftz_f` `nvvm_mul_rn_d` `nvvm_mul_rn_f` `nvvm_mul_rn_ftz_f` These nvvm intrinsics have an explicitly specified rounding mode (.rn). They should always be lowered to a PTX instruction with the same explicit rounding mode. Converting to fmul and fadd instructions result in the PTX instructions without rounding modes specified. This can cause issue because: > An add [or mul] instruction with no rounding modifier defaults to round-to-nearest-even and may be optimized aggressively by the code optimizer. In particular, mul/add sequences with no rounding modifiers may be optimized to use fused-multiply-add instructions on the target device. `nvvm_div_rn_f` `nvvm_div_rn_ftz_f` `nvvm_rcp_rn_f` `nvvm_rcp_rn_ftz_f` fdiv may be lowered to a less precise version of div, such as div.full in NVPTX depending on factors such as the value of -nvptx-prec-divf32. These intrinsics should always become the corresponding NVPTX instructions.
1 parent 6e90f13 commit 6eab9dd

File tree

2 files changed

+17
-65
lines changed

2 files changed

+17
-65
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -180,10 +180,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
180180
return {Intrinsic::ceil, FTZ_MustBeOn};
181181
case Intrinsic::nvvm_fabs_d:
182182
return {Intrinsic::fabs, FTZ_Any};
183-
case Intrinsic::nvvm_fabs_f:
184-
return {Intrinsic::fabs, FTZ_MustBeOff};
185-
case Intrinsic::nvvm_fabs_ftz_f:
186-
return {Intrinsic::fabs, FTZ_MustBeOn};
187183
case Intrinsic::nvvm_floor_d:
188184
return {Intrinsic::floor, FTZ_Any};
189185
case Intrinsic::nvvm_floor_f:
@@ -264,12 +260,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
264260
return {Intrinsic::minimum, FTZ_MustBeOff, true};
265261
case Intrinsic::nvvm_fmin_ftz_nan_f16x2:
266262
return {Intrinsic::minimum, FTZ_MustBeOn, true};
267-
case Intrinsic::nvvm_round_d:
268-
return {Intrinsic::round, FTZ_Any};
269-
case Intrinsic::nvvm_round_f:
270-
return {Intrinsic::round, FTZ_MustBeOff};
271-
case Intrinsic::nvvm_round_ftz_f:
272-
return {Intrinsic::round, FTZ_MustBeOn};
273263
case Intrinsic::nvvm_sqrt_rn_d:
274264
return {Intrinsic::sqrt, FTZ_Any};
275265
case Intrinsic::nvvm_sqrt_f:
@@ -278,10 +268,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
278268
// the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
279269
// the versions with explicit ftz-ness.
280270
return {Intrinsic::sqrt, FTZ_Any};
281-
case Intrinsic::nvvm_sqrt_rn_f:
282-
return {Intrinsic::sqrt, FTZ_MustBeOff};
283-
case Intrinsic::nvvm_sqrt_rn_ftz_f:
284-
return {Intrinsic::sqrt, FTZ_MustBeOn};
285271
case Intrinsic::nvvm_trunc_d:
286272
return {Intrinsic::trunc, FTZ_Any};
287273
case Intrinsic::nvvm_trunc_f:
@@ -316,24 +302,8 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
316302
return {Instruction::UIToFP};
317303

318304
// NVVM intrinsics that map to LLVM binary ops.
319-
case Intrinsic::nvvm_add_rn_d:
320-
return {Instruction::FAdd, FTZ_Any};
321-
case Intrinsic::nvvm_add_rn_f:
322-
return {Instruction::FAdd, FTZ_MustBeOff};
323-
case Intrinsic::nvvm_add_rn_ftz_f:
324-
return {Instruction::FAdd, FTZ_MustBeOn};
325-
case Intrinsic::nvvm_mul_rn_d:
326-
return {Instruction::FMul, FTZ_Any};
327-
case Intrinsic::nvvm_mul_rn_f:
328-
return {Instruction::FMul, FTZ_MustBeOff};
329-
case Intrinsic::nvvm_mul_rn_ftz_f:
330-
return {Instruction::FMul, FTZ_MustBeOn};
331305
case Intrinsic::nvvm_div_rn_d:
332306
return {Instruction::FDiv, FTZ_Any};
333-
case Intrinsic::nvvm_div_rn_f:
334-
return {Instruction::FDiv, FTZ_MustBeOff};
335-
case Intrinsic::nvvm_div_rn_ftz_f:
336-
return {Instruction::FDiv, FTZ_MustBeOn};
337307

338308
// The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
339309
// need special handling.
@@ -342,10 +312,6 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
342312
// as well.
343313
case Intrinsic::nvvm_rcp_rn_d:
344314
return {SPC_Reciprocal, FTZ_Any};
345-
case Intrinsic::nvvm_rcp_rn_f:
346-
return {SPC_Reciprocal, FTZ_MustBeOff};
347-
case Intrinsic::nvvm_rcp_rn_ftz_f:
348-
return {SPC_Reciprocal, FTZ_MustBeOn};
349315

350316
// We do not currently simplify intrinsics that give an approximate
351317
// answer. These include:

llvm/test/Transforms/InstCombine/NVPTX/nvvm-intrins.ll

Lines changed: 17 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,13 @@ define double @fabs_double(double %a) #0 {
4949
}
5050
; CHECK-LABEL: @fabs_float
5151
define float @fabs_float(float %a) #0 {
52-
; NOFTZ: call float @llvm.fabs.f32
53-
; FTZ: call float @llvm.nvvm.fabs.f
52+
; CHECK: call float @llvm.nvvm.fabs.f
5453
%ret = call float @llvm.nvvm.fabs.f(float %a)
5554
ret float %ret
5655
}
5756
; CHECK-LABEL: @fabs_float_ftz
5857
define float @fabs_float_ftz(float %a) #0 {
59-
; NOFTZ: call float @llvm.nvvm.fabs.ftz.f
60-
; FTZ: call float @llvm.fabs.f32
58+
; CHECK: call float @llvm.nvvm.fabs.ftz.f
6159
%ret = call float @llvm.nvvm.fabs.ftz.f(float %a)
6260
ret float %ret
6361
}
@@ -148,21 +146,19 @@ define float @fmin_float_ftz(float %a, float %b) #0 {
148146

149147
; CHECK-LABEL: @round_double
150148
define double @round_double(double %a) #0 {
151-
; CHECK: call double @llvm.round.f64
149+
; CHECK: call double @llvm.nvvm.round.d
152150
%ret = call double @llvm.nvvm.round.d(double %a)
153151
ret double %ret
154152
}
155153
; CHECK-LABEL: @round_float
156154
define float @round_float(float %a) #0 {
157-
; NOFTZ: call float @llvm.round.f32
158-
; FTZ: call float @llvm.nvvm.round.f
155+
; CHECK: call float @llvm.nvvm.round.f
159156
%ret = call float @llvm.nvvm.round.f(float %a)
160157
ret float %ret
161158
}
162159
; CHECK-LABEL: @round_float_ftz
163160
define float @round_float_ftz(float %a) #0 {
164-
; NOFTZ: call float @llvm.nvvm.round.ftz.f
165-
; FTZ: call float @llvm.round.f32
161+
; CHECK: call float @llvm.nvvm.round.ftz.f
166162
%ret = call float @llvm.nvvm.round.ftz.f(float %a)
167163
ret float %ret
168164
}
@@ -292,42 +288,38 @@ define float @test_ull2f(i64 %a) #0 {
292288

293289
; CHECK-LABEL: @test_add_rn_d
294290
define double @test_add_rn_d(double %a, double %b) #0 {
295-
; CHECK: fadd
291+
; CHECK: call double @llvm.nvvm.add.rn.d
296292
%ret = call double @llvm.nvvm.add.rn.d(double %a, double %b)
297293
ret double %ret
298294
}
299295
; CHECK-LABEL: @test_add_rn_f
300296
define float @test_add_rn_f(float %a, float %b) #0 {
301-
; NOFTZ: fadd
302-
; FTZ: call float @llvm.nvvm.add.rn.f
297+
; CHECK: call float @llvm.nvvm.add.rn.f
303298
%ret = call float @llvm.nvvm.add.rn.f(float %a, float %b)
304299
ret float %ret
305300
}
306301
; CHECK-LABEL: @test_add_rn_f_ftz
307302
define float @test_add_rn_f_ftz(float %a, float %b) #0 {
308-
; NOFTZ: call float @llvm.nvvm.add.rn.f
309-
; FTZ: fadd
303+
; CHECK: call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
310304
%ret = call float @llvm.nvvm.add.rn.ftz.f(float %a, float %b)
311305
ret float %ret
312306
}
313307

314308
; CHECK-LABEL: @test_mul_rn_d
315309
define double @test_mul_rn_d(double %a, double %b) #0 {
316-
; CHECK: fmul
310+
; CHECK: call double @llvm.nvvm.mul.rn.d
317311
%ret = call double @llvm.nvvm.mul.rn.d(double %a, double %b)
318312
ret double %ret
319313
}
320314
; CHECK-LABEL: @test_mul_rn_f
321315
define float @test_mul_rn_f(float %a, float %b) #0 {
322-
; NOFTZ: fmul
323-
; FTZ: call float @llvm.nvvm.mul.rn.f
316+
; CHECK: call float @llvm.nvvm.mul.rn.f
324317
%ret = call float @llvm.nvvm.mul.rn.f(float %a, float %b)
325318
ret float %ret
326319
}
327320
; CHECK-LABEL: @test_mul_rn_f_ftz
328321
define float @test_mul_rn_f_ftz(float %a, float %b) #0 {
329-
; NOFTZ: call float @llvm.nvvm.mul.rn.f
330-
; FTZ: fmul
322+
; CHECK: call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
331323
%ret = call float @llvm.nvvm.mul.rn.ftz.f(float %a, float %b)
332324
ret float %ret
333325
}
@@ -340,15 +332,13 @@ define double @test_div_rn_d(double %a, double %b) #0 {
340332
}
341333
; CHECK-LABEL: @test_div_rn_f
342334
define float @test_div_rn_f(float %a, float %b) #0 {
343-
; NOFTZ: fdiv
344-
; FTZ: call float @llvm.nvvm.div.rn.f
335+
; CHECK: call float @llvm.nvvm.div.rn.f
345336
%ret = call float @llvm.nvvm.div.rn.f(float %a, float %b)
346337
ret float %ret
347338
}
348339
; CHECK-LABEL: @test_div_rn_f_ftz
349340
define float @test_div_rn_f_ftz(float %a, float %b) #0 {
350-
; NOFTZ: call float @llvm.nvvm.div.rn.f
351-
; FTZ: fdiv
341+
; CHECK: call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
352342
%ret = call float @llvm.nvvm.div.rn.ftz.f(float %a, float %b)
353343
ret float %ret
354344
}
@@ -357,15 +347,13 @@ define float @test_div_rn_f_ftz(float %a, float %b) #0 {
357347

358348
; CHECK-LABEL: @test_rcp_rn_f
359349
define float @test_rcp_rn_f(float %a) #0 {
360-
; NOFTZ: fdiv float 1.0{{.*}} %a
361-
; FTZ: call float @llvm.nvvm.rcp.rn.f
350+
; CHECK: call float @llvm.nvvm.rcp.rn.f
362351
%ret = call float @llvm.nvvm.rcp.rn.f(float %a)
363352
ret float %ret
364353
}
365354
; CHECK-LABEL: @test_rcp_rn_f_ftz
366355
define float @test_rcp_rn_f_ftz(float %a) #0 {
367-
; NOFTZ: call float @llvm.nvvm.rcp.rn.f
368-
; FTZ: fdiv float 1.0{{.*}} %a
356+
; CHECK: call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
369357
%ret = call float @llvm.nvvm.rcp.rn.ftz.f(float %a)
370358
ret float %ret
371359
}
@@ -385,15 +373,13 @@ define float @test_sqrt_f(float %a) #0 {
385373
}
386374
; CHECK-LABEL: @test_sqrt_rn_f
387375
define float @test_sqrt_rn_f(float %a) #0 {
388-
; NOFTZ: call float @llvm.sqrt.f32(float %a)
389-
; FTZ: call float @llvm.nvvm.sqrt.rn.f
376+
; CHECK: call float @llvm.nvvm.sqrt.rn.f
390377
%ret = call float @llvm.nvvm.sqrt.rn.f(float %a)
391378
ret float %ret
392379
}
393380
; CHECK-LABEL: @test_sqrt_rn_f_ftz
394381
define float @test_sqrt_rn_f_ftz(float %a) #0 {
395-
; NOFTZ: call float @llvm.nvvm.sqrt.rn.f
396-
; FTZ: call float @llvm.sqrt.f32(float %a)
382+
; CHECK: call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
397383
%ret = call float @llvm.nvvm.sqrt.rn.ftz.f(float %a)
398384
ret float %ret
399385
}

0 commit comments

Comments
 (0)