Skip to content

Commit 08e0420

Browse files
authored
[GlobalISel] Commute G_FMUL and G_FADD constant LHS to RHS. (#65298)
1 parent c7cc756 commit 08e0420

File tree

7 files changed

+189
-42
lines changed

7 files changed

+189
-42
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,12 @@ class CombinerHelper {
787787
/// Match constant LHS ops that should be commuted.
788788
bool matchCommuteConstantToRHS(MachineInstr &MI);
789789

790+
/// Match constant LHS FP ops that should be commuted.
791+
bool matchCommuteFPConstantToRHS(MachineInstr &MI);
792+
793+
// Given a binop \p MI, commute operands 1 and 2.
794+
void applyCommuteBinOpOperands(MachineInstr &MI);
795+
790796
private:
791797
/// Given a non-indexed load or store instruction \p MI, find an offset that
792798
/// can be usefully and legally folded into it as a post-indexing operation.

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -396,20 +396,25 @@ def select_to_logical : GICombineRule<
396396
// Fold (C op x) -> (x op C)
397397
// TODO: handle more isCommutable opcodes
398398
// TODO: handle compares (currently not marked as isCommutable)
399-
def commute_constant_to_rhs : GICombineRule<
399+
def commute_int_constant_to_rhs : GICombineRule<
400400
(defs root:$root),
401401
(match (wip_match_opcode G_ADD, G_MUL, G_AND, G_OR, G_XOR):$root,
402402
[{ return Helper.matchCommuteConstantToRHS(*${root}); }]),
403-
(apply [{
404-
Observer.changingInstr(*${root});
405-
Register LHSReg = ${root}->getOperand(1).getReg();
406-
Register RHSReg = ${root}->getOperand(2).getReg();
407-
${root}->getOperand(1).setReg(RHSReg);
408-
${root}->getOperand(2).setReg(LHSReg);
409-
Observer.changedInstr(*${root});
410-
}])
403+
(apply [{ Helper.applyCommuteBinOpOperands(*${root}); }])
404+
>;
405+
406+
def commute_fp_constant_to_rhs : GICombineRule<
407+
(defs root:$root),
408+
(match (wip_match_opcode G_FADD, G_FMUL):$root,
409+
[{ return Helper.matchCommuteFPConstantToRHS(*${root}); }]),
410+
(apply [{ Helper.applyCommuteBinOpOperands(*${root}); }])
411411
>;
412412

413+
def commute_constant_to_rhs : GICombineGroup<[
414+
commute_int_constant_to_rhs,
415+
commute_fp_constant_to_rhs
416+
]>;
417+
413418
// Fold x op 0 -> x
414419
def right_identity_zero_frags : GICombinePatFrag<
415420
(outs root:$dst), (ins $x),

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6026,6 +6026,24 @@ bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) {
60266026
!getIConstantVRegVal(RHS, MRI);
60276027
}
60286028

6029+
bool CombinerHelper::matchCommuteFPConstantToRHS(MachineInstr &MI) {
6030+
Register LHS = MI.getOperand(1).getReg();
6031+
Register RHS = MI.getOperand(2).getReg();
6032+
std::optional<FPValueAndVReg> ValAndVReg;
6033+
if (!mi_match(LHS, MRI, m_GFCstOrSplat(ValAndVReg)))
6034+
return false;
6035+
return !mi_match(RHS, MRI, m_GFCstOrSplat(ValAndVReg));
6036+
}
6037+
6038+
void CombinerHelper::applyCommuteBinOpOperands(MachineInstr &MI) {
6039+
Observer.changingInstr(MI);
6040+
Register LHSReg = MI.getOperand(1).getReg();
6041+
Register RHSReg = MI.getOperand(2).getReg();
6042+
MI.getOperand(1).setReg(RHSReg);
6043+
MI.getOperand(2).setReg(LHSReg);
6044+
Observer.changedInstr(MI);
6045+
}
6046+
60296047
bool CombinerHelper::tryCombine(MachineInstr &MI) {
60306048
if (tryCombineCopy(MI))
60316049
return true;
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
---
4+
name: fadd
5+
tracksRegLiveness: true
6+
body: |
7+
bb.1:
8+
liveins: $s0
9+
10+
; CHECK-LABEL: name: fadd
11+
; CHECK: liveins: $s0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
14+
; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 1.000000e+00
15+
; CHECK-NEXT: %add:_(s32) = G_FADD [[COPY]], %cst
16+
; CHECK-NEXT: $s0 = COPY %add(s32)
17+
; CHECK-NEXT: RET_ReallyLR
18+
%0:_(s32) = COPY $s0
19+
%cst:_(s32) = G_FCONSTANT float 1.000000e+00
20+
%add:_(s32) = G_FADD %cst, %0
21+
$s0 = COPY %add
22+
RET_ReallyLR
23+
24+
...
25+
---
26+
name: fmul
27+
tracksRegLiveness: true
28+
body: |
29+
bb.1:
30+
liveins: $s0
31+
32+
; CHECK-LABEL: name: fmul
33+
; CHECK: liveins: $s0
34+
; CHECK-NEXT: {{ $}}
35+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $s0
36+
; CHECK-NEXT: %cst:_(s32) = G_FCONSTANT float 2.000000e+00
37+
; CHECK-NEXT: %mul:_(s32) = G_FMUL [[COPY]], %cst
38+
; CHECK-NEXT: $s0 = COPY %mul(s32)
39+
; CHECK-NEXT: RET_ReallyLR
40+
%0:_(s32) = COPY $s0
41+
%cst:_(s32) = G_FCONSTANT float 2.000000e+00
42+
%mul:_(s32) = G_FMUL %cst, %0
43+
$s0 = COPY %mul
44+
RET_ReallyLR
45+
...
46+
---
47+
name: fmul_vector
48+
tracksRegLiveness: true
49+
body: |
50+
bb.1:
51+
liveins: $s0
52+
53+
; CHECK-LABEL: name: fmul_vector
54+
; CHECK: liveins: $s0
55+
; CHECK-NEXT: {{ $}}
56+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
57+
; CHECK-NEXT: %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
58+
; CHECK-NEXT: %cst:_(<4 x s32>) = G_BUILD_VECTOR %cst_scalar(s32), %cst_scalar(s32), %cst_scalar(s32), %cst_scalar(s32)
59+
; CHECK-NEXT: %mul:_(<4 x s32>) = G_FMUL [[COPY]], %cst
60+
; CHECK-NEXT: $q0 = COPY %mul(<4 x s32>)
61+
; CHECK-NEXT: RET_ReallyLR
62+
%0:_(<4 x s32>) = COPY $q0
63+
%cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
64+
%cst:_(<4 x s32>) = G_BUILD_VECTOR %cst_scalar, %cst_scalar, %cst_scalar, %cst_scalar
65+
%mul:_(<4 x s32>) = G_FMUL %cst, %0
66+
$q0 = COPY %mul
67+
RET_ReallyLR
68+
...
69+
---
70+
name: fmul_splat_with_undef
71+
tracksRegLiveness: true
72+
body: |
73+
bb.1:
74+
liveins: $q0
75+
76+
; CHECK-LABEL: name: fmul_splat_with_undef
77+
; CHECK: liveins: $q0
78+
; CHECK-NEXT: {{ $}}
79+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
80+
; CHECK-NEXT: %undef:_(s32) = G_IMPLICIT_DEF
81+
; CHECK-NEXT: %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
82+
; CHECK-NEXT: %cst:_(<4 x s32>) = G_BUILD_VECTOR %undef(s32), %undef(s32), %cst_scalar(s32), %cst_scalar(s32)
83+
; CHECK-NEXT: %mul:_(<4 x s32>) = G_FMUL [[COPY]], %cst
84+
; CHECK-NEXT: $q0 = COPY %mul(<4 x s32>)
85+
; CHECK-NEXT: RET_ReallyLR
86+
%0:_(<4 x s32>) = COPY $q0
87+
%undef:_(s32) = G_IMPLICIT_DEF
88+
%cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
89+
%cst:_(<4 x s32>) = G_BUILD_VECTOR %undef, %undef, %cst_scalar, %cst_scalar
90+
%mul:_(<4 x s32>) = G_FMUL %cst, %0
91+
$q0 = COPY %mul
92+
RET_ReallyLR
93+
...
94+
---
95+
name: fmul_vector_nonsplat
96+
tracksRegLiveness: true
97+
body: |
98+
bb.1:
99+
liveins: $s0
100+
101+
; CHECK-LABEL: name: fmul_vector_nonsplat
102+
; CHECK: liveins: $s0
103+
; CHECK-NEXT: {{ $}}
104+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
105+
; CHECK-NEXT: %scalar:_(s32) = COPY $s0
106+
; CHECK-NEXT: %cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
107+
; CHECK-NEXT: %cst:_(<4 x s32>) = G_BUILD_VECTOR %cst_scalar(s32), %cst_scalar(s32), %cst_scalar(s32), %scalar(s32)
108+
; CHECK-NEXT: %mul:_(<4 x s32>) = G_FMUL %cst, [[COPY]]
109+
; CHECK-NEXT: $q0 = COPY %mul(<4 x s32>)
110+
; CHECK-NEXT: RET_ReallyLR
111+
%0:_(<4 x s32>) = COPY $q0
112+
%scalar:_(s32) = COPY $s0
113+
%cst_scalar:_(s32) = G_FCONSTANT float 2.000000e+00
114+
%cst:_(<4 x s32>) = G_BUILD_VECTOR %cst_scalar, %cst_scalar, %cst_scalar, %scalar
115+
%mul:_(<4 x s32>) = G_FMUL %cst, %0
116+
$q0 = COPY %mul
117+
RET_ReallyLR
118+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,7 +1728,7 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
17281728
; GFX8-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
17291729
; GFX8-NEXT: v_mov_b32_e32 v2, 0x3c00
17301730
; GFX8-NEXT: v_mul_f16_e32 v1, 1.0, v1
1731-
; GFX8-NEXT: v_mul_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1731+
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
17321732
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
17331733
; GFX8-NEXT: s_setpc_b64 s[30:31]
17341734
;
@@ -1789,7 +1789,7 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
17891789
; GFX8-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
17901790
; GFX8-NEXT: v_mov_b32_e32 v2, 0x3c00
17911791
; GFX8-NEXT: v_mul_f16_e32 v1, 1.0, v1
1792-
; GFX8-NEXT: v_mul_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1792+
; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
17931793
; GFX8-NEXT: v_or_b32_e32 v0, v1, v0
17941794
; GFX8-NEXT: s_setpc_b64 s[30:31]
17951795
;

llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ define double @v_rcp_f64_arcp_afn(double %x) {
440440
; GCN-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
441441
; GCN-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
442442
; GCN-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
443-
; GCN-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
443+
; GCN-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
444444
; GCN-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
445445
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
446446
; GCN-NEXT: s_setpc_b64 s[30:31]
@@ -453,7 +453,7 @@ define double @v_rcp_f64_arcp_afn(double %x) {
453453
; GFX10-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
454454
; GFX10-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
455455
; GFX10-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
456-
; GFX10-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
456+
; GFX10-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
457457
; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
458458
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
459459
; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -469,7 +469,7 @@ define double @v_rcp_f64_arcp_afn(double %x) {
469469
; GFX11-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
470470
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
471471
; GFX11-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
472-
; GFX11-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3]
472+
; GFX11-NEXT: v_mul_f64 v[4:5], v[2:3], 1.0
473473
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
474474
; GFX11-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
475475
; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
@@ -1436,8 +1436,8 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
14361436
; GCN-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
14371437
; GCN-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
14381438
; GCN-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
1439-
; GCN-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5]
1440-
; GCN-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7]
1439+
; GCN-NEXT: v_mul_f64 v[8:9], v[4:5], 1.0
1440+
; GCN-NEXT: v_mul_f64 v[10:11], v[6:7], 1.0
14411441
; GCN-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
14421442
; GCN-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
14431443
; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
@@ -1457,8 +1457,8 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
14571457
; GFX10-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
14581458
; GFX10-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
14591459
; GFX10-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
1460-
; GFX10-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5]
1461-
; GFX10-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7]
1460+
; GFX10-NEXT: v_mul_f64 v[8:9], v[4:5], 1.0
1461+
; GFX10-NEXT: v_mul_f64 v[10:11], v[6:7], 1.0
14621462
; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
14631463
; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
14641464
; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
@@ -1483,8 +1483,8 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
14831483
; GFX11-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
14841484
; GFX11-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
14851485
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
1486-
; GFX11-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5]
1487-
; GFX11-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7]
1486+
; GFX11-NEXT: v_mul_f64 v[8:9], v[4:5], 1.0
1487+
; GFX11-NEXT: v_mul_f64 v[10:11], v[6:7], 1.0
14881488
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
14891489
; GFX11-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
14901490
; GFX11-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0

0 commit comments

Comments
 (0)