Skip to content

Commit 6b37a65

Browse files
committed
[GlobalISel][AArch64] Combine unmerge(G_EXT v, undef) to unmerge(v).
When having <N x t> d1, unused = unmerge(G_EXT <2*N x t> v1, undef, N), it is possible to express it just as unused, d1 = unmerge v1. It is useful for tackling regressions in arm64-vcvt_f.ll, introduced in https://reviews.llvm.org/D144670.
1 parent d0e6fd9 commit 6b37a65

File tree

5 files changed

+93
-64
lines changed

5 files changed

+93
-64
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,14 @@ def vector_sext_inreg_to_shift : GICombineRule<
206206
(apply [{ applyVectorSextInReg(*${d}, MRI, B, Observer); }])
207207
>;
208208

209+
def unmerge_ext_to_unmerge_matchdata : GIDefMatchData<"Register">;
210+
def unmerge_ext_to_unmerge : GICombineRule<
211+
(defs root:$d, unmerge_ext_to_unmerge_matchdata:$matchinfo),
212+
(match (wip_match_opcode G_UNMERGE_VALUES):$d,
213+
[{ return matchUnmergeExtToUnmerge(*${d}, MRI, ${matchinfo}); }]),
214+
(apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }])
215+
>;
216+
209217
// Post-legalization combines which should happen at all optimization levels.
210218
// (E.g. ones that facilitate matching for the selector) For example, matching
211219
// pseudos.
@@ -214,7 +222,8 @@ def AArch64PostLegalizerLowering
214222
[shuffle_vector_lowering, vashr_vlshr_imm,
215223
icmp_lowering, build_vector_lowering,
216224
lower_vector_fcmp, form_truncstore,
217-
vector_sext_inreg_to_shift]> {
225+
vector_sext_inreg_to_shift,
226+
unmerge_ext_to_unmerge]> {
218227
}
219228

220229
// Post-legalization combines which are primarily optimizations.

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,53 @@ void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
10661066
Helper.lower(MI, 0, /* Unused hint type */ LLT());
10671067
}
10681068

1069+
/// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N)
1070+
/// => unused, <N x t> = unmerge v
1071+
bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1072+
Register &MatchInfo) {
1073+
assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
1074+
if (MI.getNumDefs() != 2)
1075+
return false;
1076+
if (!MRI.use_nodbg_empty(MI.getOperand(1).getReg()))
1077+
return false;
1078+
1079+
LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
1080+
if (!DstTy.isVector())
1081+
return false;
1082+
1083+
MachineInstr *Ext = getDefIgnoringCopies(
1084+
MI.getOperand(MI.getNumExplicitDefs()).getReg(), MRI);
1085+
if (!Ext || Ext->getOpcode() != AArch64::G_EXT)
1086+
return false;
1087+
1088+
Register ExtSrc1 = Ext->getOperand(1).getReg();
1089+
Register ExtSrc2 = Ext->getOperand(2).getReg();
1090+
auto LowestVal =
1091+
getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI);
1092+
if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes())
1093+
return false;
1094+
1095+
MachineInstr *Undef = getDefIgnoringCopies(ExtSrc2, MRI);
1096+
if (!Undef)
1097+
return false;
1098+
1099+
MatchInfo = ExtSrc1;
1100+
1101+
return Undef->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
1102+
}
1103+
1104+
void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
1105+
MachineIRBuilder &B,
1106+
GISelChangeObserver &Observer, Register &SrcReg) {
1107+
Observer.changingInstr(MI);
1108+
// Swap dst registers.
1109+
Register Dst1 = MI.getOperand(0).getReg();
1110+
MI.getOperand(0).setReg(MI.getOperand(1).getReg());
1111+
MI.getOperand(1).setReg(Dst1);
1112+
MI.getOperand(2).setReg(SrcReg);
1113+
Observer.changedInstr(MI);
1114+
}
1115+
10691116
class AArch64PostLegalizerLoweringImpl : public Combiner {
10701117
protected:
10711118
// TODO: Make CombinerHelper methods const.

llvm/test/CodeGen/AArch64/arm64-neon-add-pairwise.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ define i32 @addp_v4i32(<4 x i32> %a, <4 x i32> %b) {
137137
; CHECK-GI-LABEL: addp_v4i32:
138138
; CHECK-GI: // %bb.0:
139139
; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
140-
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
140+
; CHECK-GI-NEXT: mov d1, v0.d[1]
141141
; CHECK-GI-NEXT: addp v0.2s, v0.2s, v1.2s
142142
; CHECK-GI-NEXT: rev64 v1.2s, v0.2s
143143
; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
@@ -164,7 +164,7 @@ define <4 x i16> @addp_v8i16(<8 x i16> %a, <8 x i16> %b) {
164164
; CHECK-GI-LABEL: addp_v8i16:
165165
; CHECK-GI: // %bb.0:
166166
; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
167-
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
167+
; CHECK-GI-NEXT: mov d1, v0.d[1]
168168
; CHECK-GI-NEXT: addp v0.4h, v0.4h, v1.4h
169169
; CHECK-GI-NEXT: ret
170170
%1 = add <8 x i16> %a, %b
@@ -185,7 +185,7 @@ define <8 x i8> @addp_v16i8(<16 x i8> %a, <16 x i8> %b) {
185185
; CHECK-GI-LABEL: addp_v16i8:
186186
; CHECK-GI: // %bb.0:
187187
; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
188-
; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #8
188+
; CHECK-GI-NEXT: mov d1, v0.d[1]
189189
; CHECK-GI-NEXT: addp v0.8b, v0.8b, v1.8b
190190
; CHECK-GI-NEXT: ret
191191
%1 = add <16 x i8> %a, %b

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 26 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,7 @@ define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind {
7171
; CHECK-GI: // %bb.0:
7272
; CHECK-GI-NEXT: ldr q0, [x0]
7373
; CHECK-GI-NEXT: ldr q1, [x1]
74-
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
75-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
76-
; CHECK-GI-NEXT: sabdl.8h v0, v0, v1
74+
; CHECK-GI-NEXT: sabdl2.8h v0, v0, v1
7775
; CHECK-GI-NEXT: ret
7876
%load1 = load <16 x i8>, ptr %A
7977
%load2 = load <16 x i8>, ptr %B
@@ -96,9 +94,7 @@ define <4 x i32> @sabdl2_4s(ptr %A, ptr %B) nounwind {
9694
; CHECK-GI: // %bb.0:
9795
; CHECK-GI-NEXT: ldr q0, [x0]
9896
; CHECK-GI-NEXT: ldr q1, [x1]
99-
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
100-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
101-
; CHECK-GI-NEXT: sabdl.4s v0, v0, v1
97+
; CHECK-GI-NEXT: sabdl2.4s v0, v0, v1
10298
; CHECK-GI-NEXT: ret
10399
%load1 = load <8 x i16>, ptr %A
104100
%load2 = load <8 x i16>, ptr %B
@@ -121,9 +117,7 @@ define <2 x i64> @sabdl2_2d(ptr %A, ptr %B) nounwind {
121117
; CHECK-GI: // %bb.0:
122118
; CHECK-GI-NEXT: ldr q0, [x0]
123119
; CHECK-GI-NEXT: ldr q1, [x1]
124-
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
125-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
126-
; CHECK-GI-NEXT: sabdl.2d v0, v0, v1
120+
; CHECK-GI-NEXT: sabdl2.2d v0, v0, v1
127121
; CHECK-GI-NEXT: ret
128122
%load1 = load <4 x i32>, ptr %A
129123
%load2 = load <4 x i32>, ptr %B
@@ -188,9 +182,7 @@ define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind {
188182
; CHECK-GI: // %bb.0:
189183
; CHECK-GI-NEXT: ldr q0, [x0]
190184
; CHECK-GI-NEXT: ldr q1, [x1]
191-
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
192-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
193-
; CHECK-GI-NEXT: uabdl.8h v0, v0, v1
185+
; CHECK-GI-NEXT: uabdl2.8h v0, v0, v1
194186
; CHECK-GI-NEXT: ret
195187
%load1 = load <16 x i8>, ptr %A
196188
%load2 = load <16 x i8>, ptr %B
@@ -214,9 +206,7 @@ define <4 x i32> @uabdl2_4s(ptr %A, ptr %B) nounwind {
214206
; CHECK-GI: // %bb.0:
215207
; CHECK-GI-NEXT: ldr q0, [x0]
216208
; CHECK-GI-NEXT: ldr q1, [x1]
217-
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
218-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
219-
; CHECK-GI-NEXT: uabdl.4s v0, v0, v1
209+
; CHECK-GI-NEXT: uabdl2.4s v0, v0, v1
220210
; CHECK-GI-NEXT: ret
221211
%load1 = load <8 x i16>, ptr %A
222212
%load2 = load <8 x i16>, ptr %B
@@ -239,9 +229,7 @@ define <2 x i64> @uabdl2_2d(ptr %A, ptr %B) nounwind {
239229
; CHECK-GI: // %bb.0:
240230
; CHECK-GI-NEXT: ldr q0, [x0]
241231
; CHECK-GI-NEXT: ldr q1, [x1]
242-
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
243-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
244-
; CHECK-GI-NEXT: uabdl.2d v0, v0, v1
232+
; CHECK-GI-NEXT: uabdl2.2d v0, v0, v1
245233
; CHECK-GI-NEXT: ret
246234
%load1 = load <4 x i32>, ptr %A
247235
%load2 = load <4 x i32>, ptr %B
@@ -1132,12 +1120,10 @@ define <8 x i16> @sabal2_8h(ptr %A, ptr %B, ptr %C) nounwind {
11321120
;
11331121
; CHECK-GI-LABEL: sabal2_8h:
11341122
; CHECK-GI: // %bb.0:
1135-
; CHECK-GI-NEXT: ldr q0, [x0]
1136-
; CHECK-GI-NEXT: ldr q1, [x1]
1137-
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
1138-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
1123+
; CHECK-GI-NEXT: ldr q1, [x0]
1124+
; CHECK-GI-NEXT: ldr q2, [x1]
11391125
; CHECK-GI-NEXT: ldr q0, [x2]
1140-
; CHECK-GI-NEXT: sabal.8h v0, v2, v1
1126+
; CHECK-GI-NEXT: sabal2.8h v0, v1, v2
11411127
; CHECK-GI-NEXT: ret
11421128
%load1 = load <16 x i8>, ptr %A
11431129
%load2 = load <16 x i8>, ptr %B
@@ -1161,12 +1147,10 @@ define <4 x i32> @sabal2_4s(ptr %A, ptr %B, ptr %C) nounwind {
11611147
;
11621148
; CHECK-GI-LABEL: sabal2_4s:
11631149
; CHECK-GI: // %bb.0:
1164-
; CHECK-GI-NEXT: ldr q0, [x0]
1165-
; CHECK-GI-NEXT: ldr q1, [x1]
1166-
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
1167-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
1150+
; CHECK-GI-NEXT: ldr q1, [x0]
1151+
; CHECK-GI-NEXT: ldr q2, [x1]
11681152
; CHECK-GI-NEXT: ldr q0, [x2]
1169-
; CHECK-GI-NEXT: sabal.4s v0, v2, v1
1153+
; CHECK-GI-NEXT: sabal2.4s v0, v1, v2
11701154
; CHECK-GI-NEXT: ret
11711155
%load1 = load <8 x i16>, ptr %A
11721156
%load2 = load <8 x i16>, ptr %B
@@ -1190,12 +1174,10 @@ define <2 x i64> @sabal2_2d(ptr %A, ptr %B, ptr %C) nounwind {
11901174
;
11911175
; CHECK-GI-LABEL: sabal2_2d:
11921176
; CHECK-GI: // %bb.0:
1193-
; CHECK-GI-NEXT: ldr q0, [x0]
1194-
; CHECK-GI-NEXT: ldr q1, [x1]
1195-
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
1196-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
1177+
; CHECK-GI-NEXT: ldr q1, [x0]
1178+
; CHECK-GI-NEXT: ldr q2, [x1]
11971179
; CHECK-GI-NEXT: ldr q0, [x2]
1198-
; CHECK-GI-NEXT: sabal.2d v0, v2, v1
1180+
; CHECK-GI-NEXT: sabal2.2d v0, v1, v2
11991181
; CHECK-GI-NEXT: ret
12001182
%load1 = load <4 x i32>, ptr %A
12011183
%load2 = load <4 x i32>, ptr %B
@@ -1270,12 +1252,10 @@ define <8 x i16> @uabal2_8h(ptr %A, ptr %B, ptr %C) nounwind {
12701252
;
12711253
; CHECK-GI-LABEL: uabal2_8h:
12721254
; CHECK-GI: // %bb.0:
1273-
; CHECK-GI-NEXT: ldr q0, [x0]
1274-
; CHECK-GI-NEXT: ldr q1, [x1]
1275-
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
1276-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
1255+
; CHECK-GI-NEXT: ldr q1, [x0]
1256+
; CHECK-GI-NEXT: ldr q2, [x1]
12771257
; CHECK-GI-NEXT: ldr q0, [x2]
1278-
; CHECK-GI-NEXT: uabal.8h v0, v2, v1
1258+
; CHECK-GI-NEXT: uabal2.8h v0, v1, v2
12791259
; CHECK-GI-NEXT: ret
12801260
%load1 = load <16 x i8>, ptr %A
12811261
%load2 = load <16 x i8>, ptr %B
@@ -1299,12 +1279,10 @@ define <4 x i32> @uabal2_4s(ptr %A, ptr %B, ptr %C) nounwind {
12991279
;
13001280
; CHECK-GI-LABEL: uabal2_4s:
13011281
; CHECK-GI: // %bb.0:
1302-
; CHECK-GI-NEXT: ldr q0, [x0]
1303-
; CHECK-GI-NEXT: ldr q1, [x1]
1304-
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
1305-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
1282+
; CHECK-GI-NEXT: ldr q1, [x0]
1283+
; CHECK-GI-NEXT: ldr q2, [x1]
13061284
; CHECK-GI-NEXT: ldr q0, [x2]
1307-
; CHECK-GI-NEXT: uabal.4s v0, v2, v1
1285+
; CHECK-GI-NEXT: uabal2.4s v0, v1, v2
13081286
; CHECK-GI-NEXT: ret
13091287
%load1 = load <8 x i16>, ptr %A
13101288
%load2 = load <8 x i16>, ptr %B
@@ -1328,12 +1306,10 @@ define <2 x i64> @uabal2_2d(ptr %A, ptr %B, ptr %C) nounwind {
13281306
;
13291307
; CHECK-GI-LABEL: uabal2_2d:
13301308
; CHECK-GI: // %bb.0:
1331-
; CHECK-GI-NEXT: ldr q0, [x0]
1332-
; CHECK-GI-NEXT: ldr q1, [x1]
1333-
; CHECK-GI-NEXT: ext.16b v2, v0, v0, #8
1334-
; CHECK-GI-NEXT: ext.16b v1, v1, v0, #8
1309+
; CHECK-GI-NEXT: ldr q1, [x0]
1310+
; CHECK-GI-NEXT: ldr q2, [x1]
13351311
; CHECK-GI-NEXT: ldr q0, [x2]
1336-
; CHECK-GI-NEXT: uabal.2d v0, v2, v1
1312+
; CHECK-GI-NEXT: uabal2.2d v0, v1, v2
13371313
; CHECK-GI-NEXT: ret
13381314
%load1 = load <4 x i32>, ptr %A
13391315
%load2 = load <4 x i32>, ptr %B
@@ -1607,7 +1583,7 @@ define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
16071583
; CHECK-GI-LABEL: uabdl2_from_extract_dup:
16081584
; CHECK-GI: // %bb.0:
16091585
; CHECK-GI-NEXT: dup.2s v1, w0
1610-
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
1586+
; CHECK-GI-NEXT: mov d0, v0[1]
16111587
; CHECK-GI-NEXT: uabdl.2d v0, v0, v1
16121588
; CHECK-GI-NEXT: ret
16131589
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0
@@ -1642,7 +1618,7 @@ define <2 x i64> @sabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) {
16421618
; CHECK-GI-LABEL: sabdl2_from_extract_dup:
16431619
; CHECK-GI: // %bb.0:
16441620
; CHECK-GI-NEXT: dup.2s v1, w0
1645-
; CHECK-GI-NEXT: ext.16b v0, v0, v0, #8
1621+
; CHECK-GI-NEXT: mov d0, v0[1]
16461622
; CHECK-GI-NEXT: sabdl.2d v0, v0, v1
16471623
; CHECK-GI-NEXT: ret
16481624
%rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0

llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ss
3131
;
3232
; GISEL-LABEL: test_vcvt_high_f64_f32:
3333
; GISEL: // %bb.0:
34-
; GISEL-NEXT: ext.16b v0, v0, v0, #8
35-
; GISEL-NEXT: fcvtl v0.2d, v0.2s
34+
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
3635
; GISEL-NEXT: ret
3736
%cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3>
3837
%vcvt1.i = fpext <2 x float> %cvt_in to <2 x double>
@@ -80,8 +79,7 @@ define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind rea
8079
;
8180
; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast:
8281
; GISEL: // %bb.0:
83-
; GISEL-NEXT: ext.16b v0, v0, v0, #8
84-
; GISEL-NEXT: fcvtl v0.2d, v0.2s
82+
; GISEL-NEXT: fcvtl2 v0.2d, v0.4s
8583
; GISEL-NEXT: ret
8684
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
8785
%bc2 = bitcast <2 x i32> %ext to <2 x float>
@@ -97,7 +95,7 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea
9795
;
9896
; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast:
9997
; GISEL: // %bb.0:
100-
; GISEL-NEXT: ext.16b v0, v0, v0, #8
98+
; GISEL-NEXT: mov d0, v0[1]
10199
; GISEL-NEXT: fcvtl v0.2d, v0.2s
102100
; GISEL-NEXT: ret
103101
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -114,7 +112,7 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read
114112
;
115113
; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast:
116114
; GISEL: // %bb.0:
117-
; GISEL-NEXT: ext.16b v0, v0, v0, #8
115+
; GISEL-NEXT: mov d0, v0[1]
118116
; GISEL-NEXT: fcvtl v0.2d, v0.2s
119117
; GISEL-NEXT: ret
120118
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -147,7 +145,7 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read
147145
;
148146
; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast:
149147
; GISEL: // %bb.0:
150-
; GISEL-NEXT: ext.16b v0, v0, v0, #8
148+
; GISEL-NEXT: mov d0, v0[1]
151149
; GISEL-NEXT: fcvtl v0.4s, v0.4h
152150
; GISEL-NEXT: ret
153151
%ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
@@ -164,8 +162,7 @@ define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind read
164162
;
165163
; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast:
166164
; GISEL: // %bb.0:
167-
; GISEL-NEXT: ext.16b v0, v0, v0, #8
168-
; GISEL-NEXT: fcvtl v0.4s, v0.4h
165+
; GISEL-NEXT: fcvtl2 v0.4s, v0.8h
169166
; GISEL-NEXT: ret
170167
%ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
171168
%bc2 = bitcast <4 x i16> %ext to <4 x half>
@@ -181,7 +178,7 @@ define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readn
181178
;
182179
; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast:
183180
; GISEL: // %bb.0:
184-
; GISEL-NEXT: ext.16b v0, v0, v0, #8
181+
; GISEL-NEXT: mov d0, v0[1]
185182
; GISEL-NEXT: fcvtl v0.4s, v0.4h
186183
; GISEL-NEXT: ret
187184
%ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

0 commit comments

Comments
 (0)