Skip to content

Commit 13b7629

Browse files
committed
[GlobalISel][AArch64] Combine unmerge(G_EXT v, undef) to unmerge(v).
When having <N x t> d1, unused = unmerge(G_EXT <2*N x t> v1, undef, N), it is possible to express it just as unused, d1 = unmerge v1. It is useful for tackling regressions in arm64-vcvt_f.ll, introduced in https://reviews.llvm.org/D144670.
1 parent 7eeeeb0 commit 13b7629

File tree

6 files changed

+244
-64
lines changed

6 files changed

+244
-64
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,14 @@ def vector_sext_inreg_to_shift : GICombineRule<
206206
(apply [{ applyVectorSextInReg(*${d}, MRI, B, Observer); }])
207207
>;
208208

209+
def unmerge_ext_to_unmerge_matchdata : GIDefMatchData<"Register">;
210+
def unmerge_ext_to_unmerge : GICombineRule<
211+
(defs root:$d, unmerge_ext_to_unmerge_matchdata:$matchinfo),
212+
(match (wip_match_opcode G_UNMERGE_VALUES):$d,
213+
[{ return matchUnmergeExtToUnmerge(*${d}, MRI, ${matchinfo}); }]),
214+
(apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }])
215+
>;
216+
209217
// Post-legalization combines which should happen at all optimization levels.
210218
// (E.g. ones that facilitate matching for the selector) For example, matching
211219
// pseudos.
@@ -214,7 +222,8 @@ def AArch64PostLegalizerLowering
214222
[shuffle_vector_lowering, vashr_vlshr_imm,
215223
icmp_lowering, build_vector_lowering,
216224
lower_vector_fcmp, form_truncstore,
217-
vector_sext_inreg_to_shift]> {
225+
vector_sext_inreg_to_shift,
226+
unmerge_ext_to_unmerge]> {
218227
}
219228

220229
// Post-legalization combines which are primarily optimizations.

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,50 @@ void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
10661066
Helper.lower(MI, 0, /* Unused hint type */ LLT());
10671067
}
10681068

1069+
/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1070+
/// => unused, <N x t> = unmerge v
1071+
bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1072+
Register &MatchInfo) {
1073+
auto &Unmerge = cast<GUnmerge>(MI);
1074+
if (Unmerge.getNumDefs() != 2)
1075+
return false;
1076+
if (!MRI.use_nodbg_empty(Unmerge.getReg(1)))
1077+
return false;
1078+
1079+
LLT DstTy = MRI.getType(Unmerge.getReg(0));
1080+
if (!DstTy.isVector())
1081+
return false;
1082+
1083+
MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI);
1084+
if (!Ext)
1085+
return false;
1086+
1087+
Register ExtSrc1 = Ext->getOperand(1).getReg();
1088+
Register ExtSrc2 = Ext->getOperand(2).getReg();
1089+
auto LowestVal =
1090+
getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1091+
if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1092+
return false;
1093+
1094+
if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI))
1095+
return false;
1096+
1097+
MatchInfo = ExtSrc1;
1098+
return true;
1099+
}
1100+
1101+
void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1102+
MachineIRBuilder &B,
1103+
GISelChangeObserver &Observer, Register &SrcReg) {
1104+
Observer.changingInstr(MI);
1105+
// Swap dst registers.
1106+
Register Dst1 = MI.getOperand(0).getReg();
1107+
MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1108+
MI.getOperand(1).setReg(Dst1);
1109+
MI.getOperand(2).setReg(SrcReg);
1110+
Observer.changedInstr(MI);
1111+
}
1112+
10691113
class AArch64PostLegalizerLoweringImpl : public Combiner {
10701114
protected:
10711115
// TODO: Make CombinerHelper methods const.
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-lowering -global-isel -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: v4s32
6+
legalized: true
7+
body: |
8+
bb.0.entry:
9+
liveins: $q0
10+
; CHECK-LABEL: name: v4s32
11+
; CHECK: liveins: $q0
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: %v1:_(<4 x s32>) = COPY $q0
14+
; CHECK-NEXT: %unused:_(<2 x s32>), %unmerge:_(<2 x s32>) = G_UNMERGE_VALUES %v1(<4 x s32>)
15+
; CHECK-NEXT: %fpext:_(<2 x s64>) = G_FPEXT %unmerge(<2 x s32>)
16+
; CHECK-NEXT: $q0 = COPY %fpext(<2 x s64>)
17+
; CHECK-NEXT: RET_ReallyLR implicit $q0
18+
%v1:_(<4 x s32>) = COPY $q0
19+
%implicit:_(<4 x s32>) = G_IMPLICIT_DEF
20+
%C:_(s32) = G_CONSTANT i32 8
21+
%ext:_(<4 x s32>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
22+
%unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext:_(<4 x s32>)
23+
%fpext:_(<2 x s64>) = G_FPEXT %unmerge:_(<2 x s32>)
24+
$q0 = COPY %fpext
25+
RET_ReallyLR implicit $q0
26+
...
27+
---
28+
name: v8s16
29+
legalized: true
30+
body: |
31+
bb.0.entry:
32+
liveins: $q0
33+
; CHECK-LABEL: name: v8s16
34+
; CHECK: liveins: $q0
35+
; CHECK-NEXT: {{ $}}
36+
; CHECK-NEXT: %v1:_(<8 x s16>) = COPY $q0
37+
; CHECK-NEXT: %unused:_(<4 x s16>), %unmerge:_(<4 x s16>) = G_UNMERGE_VALUES %v1(<8 x s16>)
38+
; CHECK-NEXT: %fpext:_(<4 x s32>) = G_FPEXT %unmerge(<4 x s16>)
39+
; CHECK-NEXT: $q0 = COPY %fpext(<4 x s32>)
40+
; CHECK-NEXT: RET_ReallyLR implicit $q0
41+
%v1:_(<8 x s16>) = COPY $q0
42+
%implicit:_(<8 x s16>) = G_IMPLICIT_DEF
43+
%C:_(s32) = G_CONSTANT i32 8
44+
%ext:_(<8 x s16>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
45+
%unmerge:_(<4 x s16>), %unused:_(<4 x s16>) = G_UNMERGE_VALUES %ext:_(<8 x s16>)
46+
%fpext:_(<4 x s32>) = G_FPEXT %unmerge:_(<4 x s16>)
47+
$q0 = COPY %fpext
48+
RET_ReallyLR implicit $q0
49+
...
50+
---
51+
name: v16s8
52+
legalized: true
53+
body: |
54+
bb.0.entry:
55+
liveins: $q0
56+
; CHECK-LABEL: name: v16s8
57+
; CHECK: liveins: $q0
58+
; CHECK-NEXT: {{ $}}
59+
; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0
60+
; CHECK-NEXT: %unused:_(<8 x s8>), %unmerge:_(<8 x s8>) = G_UNMERGE_VALUES %v1(<16 x s8>)
61+
; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>)
62+
; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>)
63+
; CHECK-NEXT: RET_ReallyLR implicit $q0
64+
%v1:_(<16 x s8>) = COPY $q0
65+
%implicit:_(<16 x s8>) = G_IMPLICIT_DEF
66+
%C:_(s32) = G_CONSTANT i32 8
67+
%ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
68+
%unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>)
69+
%fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>)
70+
$q0 = COPY %fpext
71+
RET_ReallyLR implicit $q0
72+
...
73+
---
74+
name: skip_not_const
75+
legalized: true
76+
body: |
77+
bb.0.entry:
78+
liveins: $q0, $w0
79+
; CHECK-LABEL: name: skip_not_const
80+
; CHECK: liveins: $q0, $w0
81+
; CHECK-NEXT: {{ $}}
82+
; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0
83+
; CHECK-NEXT: %implicit:_(<16 x s8>) = G_IMPLICIT_DEF
84+
; CHECK-NEXT: %C:_(s32) = COPY $w0
85+
; CHECK-NEXT: %ext:_(<16 x s8>) = G_EXT %v1, %implicit, %C(s32)
86+
; CHECK-NEXT: %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext(<16 x s8>)
87+
; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>)
88+
; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>)
89+
; CHECK-NEXT: RET_ReallyLR implicit $q0
90+
%v1:_(<16 x s8>) = COPY $q0
91+
%implicit:_(<16 x s8>) = G_IMPLICIT_DEF
92+
%C:_(s32) = COPY $w0
93+
%ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
94+
%unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>)
95+
%fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>)
96+
$q0 = COPY %fpext
97+
RET_ReallyLR implicit $q0
98+
...
99+
---
100+
name: skip_not_unused
101+
legalized: true
102+
body: |
103+
bb.0.entry:
104+
liveins: $q0
105+
; CHECK-LABEL: name: skip_not_unused
106+
; CHECK: liveins: $q0
107+
; CHECK-NEXT: {{ $}}
108+
; CHECK-NEXT: %v1:_(<16 x s8>) = COPY $q0
109+
; CHECK-NEXT: %implicit:_(<16 x s8>) = G_IMPLICIT_DEF
110+
; CHECK-NEXT: %C:_(s32) = G_CONSTANT i32 8
111+
; CHECK-NEXT: %ext:_(<16 x s8>) = G_EXT %v1, %implicit, %C(s32)
112+
; CHECK-NEXT: %unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext(<16 x s8>)
113+
; CHECK-NEXT: %fpext:_(<8 x s16>) = G_FPEXT %unmerge(<8 x s8>)
114+
; CHECK-NEXT: %fpext2:_(<8 x s16>) = G_FPEXT %unused(<8 x s8>)
115+
; CHECK-NEXT: $q0 = COPY %fpext(<8 x s16>)
116+
; CHECK-NEXT: $q1 = COPY %fpext2(<8 x s16>)
117+
; CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1
118+
%v1:_(<16 x s8>) = COPY $q0
119+
%implicit:_(<16 x s8>) = G_IMPLICIT_DEF
120+
%C:_(s32) = G_CONSTANT i32 8
121+
%ext:_(<16 x s8>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
122+
%unmerge:_(<8 x s8>), %unused:_(<8 x s8>) = G_UNMERGE_VALUES %ext:_(<16 x s8>)
123+
%fpext:_(<8 x s16>) = G_FPEXT %unmerge:_(<8 x s8>)
124+
%fpext2:_(<8 x s16>) = G_FPEXT %unused:_(<8 x s8>)
125+
$q0 = COPY %fpext
126+
$q1 = COPY %fpext2
127+
RET_ReallyLR implicit $q0, implicit $q1
128+
...
129+
---
130+
name: skip_borders
131+
legalized: true
132+
body: |
133+
bb.0.entry:
134+
liveins: $q0
135+
; CHECK-LABEL: name: skip_borders
136+
; CHECK: liveins: $q0
137+
; CHECK-NEXT: {{ $}}
138+
; CHECK-NEXT: %v1:_(<4 x s32>) = COPY $q0
139+
; CHECK-NEXT: %implicit:_(<4 x s32>) = G_IMPLICIT_DEF
140+
; CHECK-NEXT: %C:_(s32) = G_CONSTANT i32 9
141+
; CHECK-NEXT: %ext:_(<4 x s32>) = G_EXT %v1, %implicit, %C(s32)
142+
; CHECK-NEXT: %unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext(<4 x s32>)
143+
; CHECK-NEXT: %fpext:_(<2 x s64>) = G_FPEXT %unmerge(<2 x s32>)
144+
; CHECK-NEXT: $q0 = COPY %fpext(<2 x s64>)
145+
; CHECK-NEXT: RET_ReallyLR implicit $q0
146+
%v1:_(<4 x s32>) = COPY $q0
147+
%implicit:_(<4 x s32>) = G_IMPLICIT_DEF
148+
%C:_(s32) = G_CONSTANT i32 9
149+
%ext:_(<4 x s32>) = G_EXT %v1:_, %implicit:_, %C:_(s32)
150+
%unmerge:_(<2 x s32>), %unused:_(<2 x s32>) = G_UNMERGE_VALUES %ext:_(<4 x s32>)
151+
%fpext:_(<2 x s64>) = G_FPEXT %unmerge:_(<2 x s32>)
152+
$q0 = COPY %fpext
153+
RET_ReallyLR implicit $q0
154+
...

llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
137137
; CHECK-GI-LABEL: addp_v4i32:
138138
; CHECK-GI: // %bb.0:
139139
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
140-
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
140+
; CHECK-GI-NEXT: mov d1, v0.d[1]
141141
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
142142
; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
143143
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
@@ -164,7 +164,7 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) {
164164
; CHECK-GI-LABEL: addp_v8i16:
165165
; CHECK-GI: // %bb.0:
166166
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
167-
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
167+
; CHECK-GI-NEXT: mov d1, v0.d[1]
168168
; CHECK-GI-NEXT: addp v0.4h, v0.4h, v1.4h
169169
; CHECK-GI-NEXT: ret
170170
%1 = add <8 x i16> %a, %b
@@ -185,7 +185,7 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) {
185185
; CHECK-GI-LABEL: addp_v16i8:
186186
; CHECK-GI: // %bb.0:
187187
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
188-
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
188+
; CHECK-GI-NEXT: mov d1, v0.d[1]
189189
; CHECK-GI-NEXT: addp v0.8b, v0.8b, v1.8b
190190
; CHECK-GI-NEXT: ret
191191
%1 = add <16 x i8> %a, %b

0 commit comments

Comments
 (0)