Skip to content

Commit 22d26ae

Browse files
authored
[RISCV] Optimize (slli (srli (slli X, C1), C1), C2) -> (srli (slli X, C1), C1-C2) (#119567)
Masking out most significant bits can be done with shl followed by srl with same shift amount. If this is followed by a shl, we could instead srl by a smaller amount of bits. This transform is already implemented in tablegen for masking out 32 most significant bits. Emits better code for e.g. float *index(float *p, int i) { return p + (i & (1 << 30) - 1); }
1 parent 0614c60 commit 22d26ae

File tree

6 files changed

+674
-600
lines changed

6 files changed

+674
-600
lines changed

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1026,13 +1026,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
10261026
unsigned ShAmt = N1C->getZExtValue();
10271027
uint64_t Mask = N0.getConstantOperandVal(1);
10281028

1029-
// Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
1030-
// 32 leading zeros and C3 trailing zeros.
10311029
if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
10321030
unsigned XLen = Subtarget->getXLen();
10331031
unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
10341032
unsigned TrailingZeros = llvm::countr_zero(Mask);
10351033
if (TrailingZeros > 0 && LeadingZeros == 32) {
1034+
// Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C)
1035+
// where C2 has 32 leading zeros and C3 trailing zeros.
10361036
SDNode *SRLIW = CurDAG->getMachineNode(
10371037
RISCV::SRLIW, DL, VT, N0->getOperand(0),
10381038
CurDAG->getTargetConstant(TrailingZeros, DL, VT));
@@ -1042,6 +1042,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
10421042
ReplaceNode(Node, SLLI);
10431043
return;
10441044
}
1045+
if (TrailingZeros == 0 && LeadingZeros > ShAmt &&
1046+
XLen - LeadingZeros > 11 && LeadingZeros != 32) {
1047+
// Optimize (shl (and X, C2), C) -> (srli (slli X, C4), C4-C)
1048+
// where C2 has C4 leading zeros and no trailing zeros.
1049+
// This is profitable if the "and" was to be lowered to
1050+
// (srli (slli X, C4), C4) and not (andi X, C2).
1051+
// For "LeadingZeros == 32":
1052+
// - with Zba it's just (slli.uw X, C)
1053+
// - without Zba a tablegen pattern applies the very same
1054+
// transform as we would have done here
1055+
SDNode *SLLI = CurDAG->getMachineNode(
1056+
RISCV::SLLI, DL, VT, N0->getOperand(0),
1057+
CurDAG->getTargetConstant(LeadingZeros, DL, VT));
1058+
SDNode *SRLI = CurDAG->getMachineNode(
1059+
RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
1060+
CurDAG->getTargetConstant(LeadingZeros - ShAmt, DL, VT));
1061+
ReplaceNode(Node, SRLI);
1062+
return;
1063+
}
10451064
}
10461065
break;
10471066
}

llvm/test/CodeGen/RISCV/and-shl.ll

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s -check-prefix=RV32I
4+
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
5+
; RUN: | FileCheck %s -check-prefix=RV64I
6+
7+
define i32 @and_0xfff_shl_2(i32 %x) {
8+
; RV32I-LABEL: and_0xfff_shl_2:
9+
; RV32I: # %bb.0:
10+
; RV32I-NEXT: slli a0, a0, 20
11+
; RV32I-NEXT: srli a0, a0, 18
12+
; RV32I-NEXT: ret
13+
;
14+
; RV64I-LABEL: and_0xfff_shl_2:
15+
; RV64I: # %bb.0:
16+
; RV64I-NEXT: slli a0, a0, 52
17+
; RV64I-NEXT: srli a0, a0, 50
18+
; RV64I-NEXT: ret
19+
%a = and i32 %x, 4095
20+
%s = shl i32 %a, 2
21+
ret i32 %s
22+
}
23+
24+
define i32 @and_0x7ff_shl_2(i32 %x) {
25+
; RV32I-LABEL: and_0x7ff_shl_2:
26+
; RV32I: # %bb.0:
27+
; RV32I-NEXT: andi a0, a0, 2047
28+
; RV32I-NEXT: slli a0, a0, 2
29+
; RV32I-NEXT: ret
30+
;
31+
; RV64I-LABEL: and_0x7ff_shl_2:
32+
; RV64I: # %bb.0:
33+
; RV64I-NEXT: andi a0, a0, 2047
34+
; RV64I-NEXT: slli a0, a0, 2
35+
; RV64I-NEXT: ret
36+
%a = and i32 %x, 2047
37+
%s = shl i32 %a, 2
38+
ret i32 %s
39+
}
40+
41+
define i64 @and_0xffffffff_shl_2(i64 %x) {
42+
; RV32I-LABEL: and_0xffffffff_shl_2:
43+
; RV32I: # %bb.0:
44+
; RV32I-NEXT: slli a2, a0, 2
45+
; RV32I-NEXT: srli a1, a0, 30
46+
; RV32I-NEXT: mv a0, a2
47+
; RV32I-NEXT: ret
48+
;
49+
; RV64I-LABEL: and_0xffffffff_shl_2:
50+
; RV64I: # %bb.0:
51+
; RV64I-NEXT: slli a0, a0, 32
52+
; RV64I-NEXT: srli a0, a0, 30
53+
; RV64I-NEXT: ret
54+
%a = and i64 %x, 4294967295
55+
%s = shl i64 %a, 2
56+
ret i64 %s
57+
}
58+
59+
define i32 @and_0xfff_shl_2_multi_use(i32 %x) {
60+
; RV32I-LABEL: and_0xfff_shl_2_multi_use:
61+
; RV32I: # %bb.0:
62+
; RV32I-NEXT: slli a0, a0, 20
63+
; RV32I-NEXT: srli a0, a0, 20
64+
; RV32I-NEXT: slli a1, a0, 2
65+
; RV32I-NEXT: add a0, a0, a1
66+
; RV32I-NEXT: ret
67+
;
68+
; RV64I-LABEL: and_0xfff_shl_2_multi_use:
69+
; RV64I: # %bb.0:
70+
; RV64I-NEXT: slli a0, a0, 52
71+
; RV64I-NEXT: srli a0, a0, 52
72+
; RV64I-NEXT: slli a1, a0, 2
73+
; RV64I-NEXT: add a0, a0, a1
74+
; RV64I-NEXT: ret
75+
%a = and i32 %x, 4095
76+
%s = shl i32 %a, 2
77+
%r = add i32 %a, %s
78+
ret i32 %r
79+
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll

Lines changed: 84 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -124,42 +124,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
124124
; ZVFH32: # %bb.0:
125125
; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
126126
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
127-
; ZVFH32-NEXT: lui a1, 8
128127
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
129-
; ZVFH32-NEXT: vmv.x.s a2, v9
130-
; ZVFH32-NEXT: addi a1, a1, -1
128+
; ZVFH32-NEXT: vmv.x.s a1, v9
131129
; ZVFH32-NEXT: vslidedown.vi v9, v9, 1
132-
; ZVFH32-NEXT: vmv.x.s a3, v8
133-
; ZVFH32-NEXT: and a2, a2, a1
134-
; ZVFH32-NEXT: vmv.x.s a4, v9
135-
; ZVFH32-NEXT: and a1, a4, a1
136-
; ZVFH32-NEXT: slli a4, a3, 17
137-
; ZVFH32-NEXT: slli a3, a3, 30
138-
; ZVFH32-NEXT: srli a4, a4, 19
139-
; ZVFH32-NEXT: slli a1, a1, 15
140-
; ZVFH32-NEXT: or a2, a2, a3
141-
; ZVFH32-NEXT: or a1, a2, a1
130+
; ZVFH32-NEXT: vmv.x.s a2, v8
131+
; ZVFH32-NEXT: slli a1, a1, 17
132+
; ZVFH32-NEXT: srli a1, a1, 17
133+
; ZVFH32-NEXT: slli a3, a2, 30
134+
; ZVFH32-NEXT: or a1, a1, a3
135+
; ZVFH32-NEXT: vmv.x.s a3, v9
136+
; ZVFH32-NEXT: slli a2, a2, 17
137+
; ZVFH32-NEXT: slli a3, a3, 17
138+
; ZVFH32-NEXT: srli a2, a2, 19
139+
; ZVFH32-NEXT: srli a3, a3, 2
140+
; ZVFH32-NEXT: or a1, a1, a3
142141
; ZVFH32-NEXT: sw a1, 0(a0)
143-
; ZVFH32-NEXT: sh a4, 4(a0)
142+
; ZVFH32-NEXT: sh a2, 4(a0)
144143
; ZVFH32-NEXT: ret
145144
;
146145
; ZVFH64-LABEL: fp2si_v3f32_v3i15:
147146
; ZVFH64: # %bb.0:
148147
; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
149148
; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8
150-
; ZVFH64-NEXT: lui a1, 8
151-
; ZVFH64-NEXT: vmv.x.s a2, v9
152-
; ZVFH64-NEXT: addiw a1, a1, -1
149+
; ZVFH64-NEXT: vmv.x.s a1, v9
153150
; ZVFH64-NEXT: vslidedown.vi v8, v9, 1
154151
; ZVFH64-NEXT: vslidedown.vi v9, v9, 2
155-
; ZVFH64-NEXT: and a2, a2, a1
156-
; ZVFH64-NEXT: vmv.x.s a3, v8
157-
; ZVFH64-NEXT: and a1, a3, a1
152+
; ZVFH64-NEXT: slli a1, a1, 49
153+
; ZVFH64-NEXT: vmv.x.s a2, v8
158154
; ZVFH64-NEXT: vmv.x.s a3, v9
155+
; ZVFH64-NEXT: srli a1, a1, 49
156+
; ZVFH64-NEXT: slli a2, a2, 49
159157
; ZVFH64-NEXT: slli a3, a3, 30
160-
; ZVFH64-NEXT: slli a1, a1, 15
161-
; ZVFH64-NEXT: or a2, a2, a3
162-
; ZVFH64-NEXT: or a1, a2, a1
158+
; ZVFH64-NEXT: srli a2, a2, 34
159+
; ZVFH64-NEXT: or a1, a1, a3
160+
; ZVFH64-NEXT: or a1, a1, a2
163161
; ZVFH64-NEXT: slli a2, a1, 19
164162
; ZVFH64-NEXT: srli a2, a2, 51
165163
; ZVFH64-NEXT: sw a1, 0(a0)
@@ -170,42 +168,40 @@ define <3 x i15> @fp2si_v3f32_v3i15(<3 x float> %x) {
170168
; ZVFHMIN32: # %bb.0:
171169
; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
172170
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
173-
; ZVFHMIN32-NEXT: lui a1, 8
174171
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
175-
; ZVFHMIN32-NEXT: vmv.x.s a2, v9
176-
; ZVFHMIN32-NEXT: addi a1, a1, -1
172+
; ZVFHMIN32-NEXT: vmv.x.s a1, v9
177173
; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1
178-
; ZVFHMIN32-NEXT: vmv.x.s a3, v8
179-
; ZVFHMIN32-NEXT: and a2, a2, a1
180-
; ZVFHMIN32-NEXT: vmv.x.s a4, v9
181-
; ZVFHMIN32-NEXT: and a1, a4, a1
182-
; ZVFHMIN32-NEXT: slli a4, a3, 17
183-
; ZVFHMIN32-NEXT: slli a3, a3, 30
184-
; ZVFHMIN32-NEXT: srli a4, a4, 19
185-
; ZVFHMIN32-NEXT: slli a1, a1, 15
186-
; ZVFHMIN32-NEXT: or a2, a2, a3
187-
; ZVFHMIN32-NEXT: or a1, a2, a1
174+
; ZVFHMIN32-NEXT: vmv.x.s a2, v8
175+
; ZVFHMIN32-NEXT: slli a1, a1, 17
176+
; ZVFHMIN32-NEXT: srli a1, a1, 17
177+
; ZVFHMIN32-NEXT: slli a3, a2, 30
178+
; ZVFHMIN32-NEXT: or a1, a1, a3
179+
; ZVFHMIN32-NEXT: vmv.x.s a3, v9
180+
; ZVFHMIN32-NEXT: slli a2, a2, 17
181+
; ZVFHMIN32-NEXT: slli a3, a3, 17
182+
; ZVFHMIN32-NEXT: srli a2, a2, 19
183+
; ZVFHMIN32-NEXT: srli a3, a3, 2
184+
; ZVFHMIN32-NEXT: or a1, a1, a3
188185
; ZVFHMIN32-NEXT: sw a1, 0(a0)
189-
; ZVFHMIN32-NEXT: sh a4, 4(a0)
186+
; ZVFHMIN32-NEXT: sh a2, 4(a0)
190187
; ZVFHMIN32-NEXT: ret
191188
;
192189
; ZVFHMIN64-LABEL: fp2si_v3f32_v3i15:
193190
; ZVFHMIN64: # %bb.0:
194191
; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
195192
; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8
196-
; ZVFHMIN64-NEXT: lui a1, 8
197-
; ZVFHMIN64-NEXT: vmv.x.s a2, v9
198-
; ZVFHMIN64-NEXT: addiw a1, a1, -1
193+
; ZVFHMIN64-NEXT: vmv.x.s a1, v9
199194
; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1
200195
; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2
201-
; ZVFHMIN64-NEXT: and a2, a2, a1
202-
; ZVFHMIN64-NEXT: vmv.x.s a3, v8
203-
; ZVFHMIN64-NEXT: and a1, a3, a1
196+
; ZVFHMIN64-NEXT: slli a1, a1, 49
197+
; ZVFHMIN64-NEXT: vmv.x.s a2, v8
204198
; ZVFHMIN64-NEXT: vmv.x.s a3, v9
199+
; ZVFHMIN64-NEXT: srli a1, a1, 49
200+
; ZVFHMIN64-NEXT: slli a2, a2, 49
205201
; ZVFHMIN64-NEXT: slli a3, a3, 30
206-
; ZVFHMIN64-NEXT: slli a1, a1, 15
207-
; ZVFHMIN64-NEXT: or a2, a2, a3
208-
; ZVFHMIN64-NEXT: or a1, a2, a1
202+
; ZVFHMIN64-NEXT: srli a2, a2, 34
203+
; ZVFHMIN64-NEXT: or a1, a1, a3
204+
; ZVFHMIN64-NEXT: or a1, a1, a2
209205
; ZVFHMIN64-NEXT: slli a2, a1, 19
210206
; ZVFHMIN64-NEXT: srli a2, a2, 51
211207
; ZVFHMIN64-NEXT: sw a1, 0(a0)
@@ -221,42 +217,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
221217
; ZVFH32: # %bb.0:
222218
; ZVFH32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
223219
; ZVFH32-NEXT: vfncvt.rtz.x.f.w v9, v8
224-
; ZVFH32-NEXT: lui a1, 16
225220
; ZVFH32-NEXT: vslidedown.vi v8, v9, 2
226-
; ZVFH32-NEXT: vmv.x.s a2, v9
227-
; ZVFH32-NEXT: addi a1, a1, -1
221+
; ZVFH32-NEXT: vmv.x.s a1, v9
228222
; ZVFH32-NEXT: vslidedown.vi v9, v9, 1
229-
; ZVFH32-NEXT: vmv.x.s a3, v8
230-
; ZVFH32-NEXT: and a2, a2, a1
231-
; ZVFH32-NEXT: vmv.x.s a4, v9
232-
; ZVFH32-NEXT: and a1, a4, a1
233-
; ZVFH32-NEXT: slli a4, a3, 17
234-
; ZVFH32-NEXT: slli a3, a3, 30
235-
; ZVFH32-NEXT: srli a4, a4, 19
236-
; ZVFH32-NEXT: slli a1, a1, 15
237-
; ZVFH32-NEXT: or a2, a2, a3
238-
; ZVFH32-NEXT: or a1, a2, a1
223+
; ZVFH32-NEXT: vmv.x.s a2, v8
224+
; ZVFH32-NEXT: slli a1, a1, 16
225+
; ZVFH32-NEXT: srli a1, a1, 16
226+
; ZVFH32-NEXT: slli a3, a2, 30
227+
; ZVFH32-NEXT: or a1, a1, a3
228+
; ZVFH32-NEXT: vmv.x.s a3, v9
229+
; ZVFH32-NEXT: slli a2, a2, 17
230+
; ZVFH32-NEXT: slli a3, a3, 16
231+
; ZVFH32-NEXT: srli a2, a2, 19
232+
; ZVFH32-NEXT: srli a3, a3, 1
233+
; ZVFH32-NEXT: or a1, a1, a3
239234
; ZVFH32-NEXT: sw a1, 0(a0)
240-
; ZVFH32-NEXT: sh a4, 4(a0)
235+
; ZVFH32-NEXT: sh a2, 4(a0)
241236
; ZVFH32-NEXT: ret
242237
;
243238
; ZVFH64-LABEL: fp2ui_v3f32_v3i15:
244239
; ZVFH64: # %bb.0:
245240
; ZVFH64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
246241
; ZVFH64-NEXT: vfncvt.rtz.x.f.w v9, v8
247-
; ZVFH64-NEXT: lui a1, 16
248-
; ZVFH64-NEXT: vmv.x.s a2, v9
249-
; ZVFH64-NEXT: addiw a1, a1, -1
242+
; ZVFH64-NEXT: vmv.x.s a1, v9
250243
; ZVFH64-NEXT: vslidedown.vi v8, v9, 1
251244
; ZVFH64-NEXT: vslidedown.vi v9, v9, 2
252-
; ZVFH64-NEXT: and a2, a2, a1
253-
; ZVFH64-NEXT: vmv.x.s a3, v8
254-
; ZVFH64-NEXT: and a1, a3, a1
245+
; ZVFH64-NEXT: slli a1, a1, 48
246+
; ZVFH64-NEXT: vmv.x.s a2, v8
255247
; ZVFH64-NEXT: vmv.x.s a3, v9
248+
; ZVFH64-NEXT: srli a1, a1, 48
249+
; ZVFH64-NEXT: slli a2, a2, 48
256250
; ZVFH64-NEXT: slli a3, a3, 30
257-
; ZVFH64-NEXT: slli a1, a1, 15
258-
; ZVFH64-NEXT: or a2, a2, a3
259-
; ZVFH64-NEXT: or a1, a2, a1
251+
; ZVFH64-NEXT: srli a2, a2, 33
252+
; ZVFH64-NEXT: or a1, a1, a3
253+
; ZVFH64-NEXT: or a1, a1, a2
260254
; ZVFH64-NEXT: slli a2, a1, 19
261255
; ZVFH64-NEXT: srli a2, a2, 51
262256
; ZVFH64-NEXT: sw a1, 0(a0)
@@ -267,42 +261,40 @@ define <3 x i15> @fp2ui_v3f32_v3i15(<3 x float> %x) {
267261
; ZVFHMIN32: # %bb.0:
268262
; ZVFHMIN32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
269263
; ZVFHMIN32-NEXT: vfncvt.rtz.x.f.w v9, v8
270-
; ZVFHMIN32-NEXT: lui a1, 16
271264
; ZVFHMIN32-NEXT: vslidedown.vi v8, v9, 2
272-
; ZVFHMIN32-NEXT: vmv.x.s a2, v9
273-
; ZVFHMIN32-NEXT: addi a1, a1, -1
265+
; ZVFHMIN32-NEXT: vmv.x.s a1, v9
274266
; ZVFHMIN32-NEXT: vslidedown.vi v9, v9, 1
275-
; ZVFHMIN32-NEXT: vmv.x.s a3, v8
276-
; ZVFHMIN32-NEXT: and a2, a2, a1
277-
; ZVFHMIN32-NEXT: vmv.x.s a4, v9
278-
; ZVFHMIN32-NEXT: and a1, a4, a1
279-
; ZVFHMIN32-NEXT: slli a4, a3, 17
280-
; ZVFHMIN32-NEXT: slli a3, a3, 30
281-
; ZVFHMIN32-NEXT: srli a4, a4, 19
282-
; ZVFHMIN32-NEXT: slli a1, a1, 15
283-
; ZVFHMIN32-NEXT: or a2, a2, a3
284-
; ZVFHMIN32-NEXT: or a1, a2, a1
267+
; ZVFHMIN32-NEXT: vmv.x.s a2, v8
268+
; ZVFHMIN32-NEXT: slli a1, a1, 16
269+
; ZVFHMIN32-NEXT: srli a1, a1, 16
270+
; ZVFHMIN32-NEXT: slli a3, a2, 30
271+
; ZVFHMIN32-NEXT: or a1, a1, a3
272+
; ZVFHMIN32-NEXT: vmv.x.s a3, v9
273+
; ZVFHMIN32-NEXT: slli a2, a2, 17
274+
; ZVFHMIN32-NEXT: slli a3, a3, 16
275+
; ZVFHMIN32-NEXT: srli a2, a2, 19
276+
; ZVFHMIN32-NEXT: srli a3, a3, 1
277+
; ZVFHMIN32-NEXT: or a1, a1, a3
285278
; ZVFHMIN32-NEXT: sw a1, 0(a0)
286-
; ZVFHMIN32-NEXT: sh a4, 4(a0)
279+
; ZVFHMIN32-NEXT: sh a2, 4(a0)
287280
; ZVFHMIN32-NEXT: ret
288281
;
289282
; ZVFHMIN64-LABEL: fp2ui_v3f32_v3i15:
290283
; ZVFHMIN64: # %bb.0:
291284
; ZVFHMIN64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
292285
; ZVFHMIN64-NEXT: vfncvt.rtz.x.f.w v9, v8
293-
; ZVFHMIN64-NEXT: lui a1, 16
294-
; ZVFHMIN64-NEXT: vmv.x.s a2, v9
295-
; ZVFHMIN64-NEXT: addiw a1, a1, -1
286+
; ZVFHMIN64-NEXT: vmv.x.s a1, v9
296287
; ZVFHMIN64-NEXT: vslidedown.vi v8, v9, 1
297288
; ZVFHMIN64-NEXT: vslidedown.vi v9, v9, 2
298-
; ZVFHMIN64-NEXT: and a2, a2, a1
299-
; ZVFHMIN64-NEXT: vmv.x.s a3, v8
300-
; ZVFHMIN64-NEXT: and a1, a3, a1
289+
; ZVFHMIN64-NEXT: slli a1, a1, 48
290+
; ZVFHMIN64-NEXT: vmv.x.s a2, v8
301291
; ZVFHMIN64-NEXT: vmv.x.s a3, v9
292+
; ZVFHMIN64-NEXT: srli a1, a1, 48
293+
; ZVFHMIN64-NEXT: slli a2, a2, 48
302294
; ZVFHMIN64-NEXT: slli a3, a3, 30
303-
; ZVFHMIN64-NEXT: slli a1, a1, 15
304-
; ZVFHMIN64-NEXT: or a2, a2, a3
305-
; ZVFHMIN64-NEXT: or a1, a2, a1
295+
; ZVFHMIN64-NEXT: srli a2, a2, 33
296+
; ZVFHMIN64-NEXT: or a1, a1, a3
297+
; ZVFHMIN64-NEXT: or a1, a1, a2
306298
; ZVFHMIN64-NEXT: slli a2, a1, 19
307299
; ZVFHMIN64-NEXT: srli a2, a2, 51
308300
; ZVFHMIN64-NEXT: sw a1, 0(a0)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3296,11 +3296,11 @@ define <4 x i16> @buildvec_v4i16_pack(i16 %e1, i16 %e2, i16 %e3, i16 %e4) {
32963296
; RVA22U64-LABEL: buildvec_v4i16_pack:
32973297
; RVA22U64: # %bb.0:
32983298
; RVA22U64-NEXT: slli a3, a3, 48
3299-
; RVA22U64-NEXT: zext.h a2, a2
3299+
; RVA22U64-NEXT: slli a2, a2, 48
33003300
; RVA22U64-NEXT: zext.h a0, a0
3301-
; RVA22U64-NEXT: zext.h a1, a1
3302-
; RVA22U64-NEXT: slli a2, a2, 32
3303-
; RVA22U64-NEXT: slli a1, a1, 16
3301+
; RVA22U64-NEXT: slli a1, a1, 48
3302+
; RVA22U64-NEXT: srli a2, a2, 16
3303+
; RVA22U64-NEXT: srli a1, a1, 32
33043304
; RVA22U64-NEXT: or a2, a2, a3
33053305
; RVA22U64-NEXT: or a0, a0, a1
33063306
; RVA22U64-NEXT: or a0, a0, a2

0 commit comments

Comments
 (0)