-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[NFC][AMDGPU] Add lit tests for FMA combining with freeze and nnan variants #142628
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-amdgpu Author: Harrison Hao (harrisonGPU) ChangesAfter this PR #142345, combining Closes: #141622 Full diff: https://github.com/llvm/llvm-project/pull/142628.diff 1 Files Affected:
diff --git a/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
new file mode 100644
index 0000000000000..dbf5636ae03ed
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-freeze-fmul-to-fma.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck %s
+
+define float @fma_from_freeze_mul_add_left(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_left:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_left_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_right:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_add_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_add_right_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_add_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %add = fadd reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %add
+}
+
+define float @fma_from_freeze_mul_sub_left(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_left:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, v0, v1, -1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_left_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_left_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_add_f32_e32 v0, -1.0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nnan nsz arcp contract afn float %mul.fr, 1.000000e+00
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_right:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_fma_f32 v0, -v0, v1, 1.0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %sub
+}
+
+define float @fma_from_freeze_mul_sub_right_with_nnan(float %x, float %y) {
+; CHECK-LABEL: fma_from_freeze_mul_sub_right_with_nnan:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %mul = fmul reassoc nnan nsz arcp contract afn float %x, %y
+ %mul.fr = freeze float %mul
+ %sub = fsub reassoc nnan nsz arcp contract afn float 1.000000e+00, %mul.fr
+ ret float %sub
+}
|
Thanks - but making this work in the presence of ninf and nnan is pretty important too. |
I will implement it in this PR #142250 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
c2626dc
to
9e6d5f8
Compare
…riants (llvm#142628) `freeze` on `fmul` (without `nnan`) followed by `fadd` or `fsub` into a single `fma` is supported. This patch adds lit tests to verify the optimization behavior for both nnan and non-nnan variants.
…riants (llvm#142628) `freeze` on `fmul` (without `nnan`) followed by `fadd` or `fsub` into a single `fma` is supported. This patch adds lit tests to verify the optimization behavior for both nnan and non-nnan variants.
After this PR #142345, combining
freeze
onfmul
(withoutnnan
) followed byfadd
orfsub
into a singlefma
is supported.This patch adds lit tests to verify the optimization behavior for both nnan
and non-nnan variants.