Skip to content

Commit c432936

Browse files
authored
[SelectionDAG][RISCV] Use VP_STORE to widen MSTORE in type legalization when possible. (#140991)
Widening the mask and padding with zeros doesn't work for scalable vectors. Using VL produces less code for fixed vectors. Similar was recently done for MLOAD.
1 parent f578f56 commit c432936

File tree

4 files changed

+72
-80
lines changed

4 files changed

+72
-80
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7316,32 +7316,48 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
73167316
SDValue Mask = MST->getMask();
73177317
EVT MaskVT = Mask.getValueType();
73187318
SDValue StVal = MST->getValue();
7319+
EVT VT = StVal.getValueType();
73197320
SDLoc dl(N);
73207321

7322+
EVT WideVT, WideMaskVT;
73217323
if (OpNo == 1) {
73227324
// Widen the value.
73237325
StVal = GetWidenedVector(StVal);
73247326

7327+
WideVT = StVal.getValueType();
7328+
WideMaskVT =
7329+
EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(),
7330+
WideVT.getVectorElementCount());
7331+
} else {
7332+
WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
7333+
7334+
EVT ValueVT = StVal.getValueType();
7335+
WideVT = EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
7336+
WideMaskVT.getVectorElementCount());
7337+
}
7338+
7339+
if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
7340+
TLI.isTypeLegal(WideMaskVT)) {
7341+
Mask = DAG.getInsertSubvector(dl, DAG.getUNDEF(WideMaskVT), Mask, 0);
7342+
SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
7343+
VT.getVectorElementCount());
7344+
return DAG.getStoreVP(MST->getChain(), dl, StVal, MST->getBasePtr(),
7345+
MST->getOffset(), Mask, EVL, MST->getMemoryVT(),
7346+
MST->getMemOperand(), MST->getAddressingMode());
7347+
}
7348+
7349+
if (OpNo == 1) {
73257350
// The mask should be widened as well.
7326-
EVT WideVT = StVal.getValueType();
7327-
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
7328-
MaskVT.getVectorElementType(),
7329-
WideVT.getVectorNumElements());
73307351
Mask = ModifyToType(Mask, WideMaskVT, true);
73317352
} else {
73327353
// Widen the mask.
7333-
EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
73347354
Mask = ModifyToType(Mask, WideMaskVT, true);
73357355

7336-
EVT ValueVT = StVal.getValueType();
7337-
EVT WideVT = EVT::getVectorVT(*DAG.getContext(),
7338-
ValueVT.getVectorElementType(),
7339-
WideMaskVT.getVectorNumElements());
73407356
StVal = ModifyToType(StVal, WideVT);
73417357
}
73427358

7343-
assert(Mask.getValueType().getVectorNumElements() ==
7344-
StVal.getValueType().getVectorNumElements() &&
7359+
assert(Mask.getValueType().getVectorElementCount() ==
7360+
StVal.getValueType().getVectorElementCount() &&
73457361
"Mask and data vectors should have the same number of elements");
73467362
return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
73477363
MST->getOffset(), Mask, MST->getMemoryVT(),

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,7 @@ define void @masked_store_v128f16(<128 x half> %val, ptr %a, <128 x i1> %mask) {
325325
define void @masked_store_v7f32(<7 x float> %val, ptr %a, <7 x i1> %mask) {
326326
; CHECK-LABEL: masked_store_v7f32:
327327
; CHECK: # %bb.0:
328-
; CHECK-NEXT: li a1, 127
329-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
330-
; CHECK-NEXT: vmv.s.x v10, a1
331-
; CHECK-NEXT: vmand.mm v0, v0, v10
328+
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
332329
; CHECK-NEXT: vse32.v v8, (a0), v0.t
333330
; CHECK-NEXT: ret
334331
call void @llvm.masked.store.v7f32.p0(<7 x float> %val, ptr %a, i32 8, <7 x i1> %mask)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -333,10 +333,7 @@ define void @masked_store_v256i8(<256 x i8> %val, ptr %a, <256 x i1> %mask) {
333333
define void @masked_store_v7i8(<7 x i8> %val, ptr %a, <7 x i1> %mask) {
334334
; CHECK-LABEL: masked_store_v7i8:
335335
; CHECK: # %bb.0:
336-
; CHECK-NEXT: li a1, 127
337-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
338-
; CHECK-NEXT: vmv.s.x v9, a1
339-
; CHECK-NEXT: vmand.mm v0, v0, v9
336+
; CHECK-NEXT: vsetivli zero, 7, e8, mf2, ta, ma
340337
; CHECK-NEXT: vse8.v v8, (a0), v0.t
341338
; CHECK-NEXT: ret
342339
call void @llvm.masked.store.v7i8.p0(<7 x i8> %val, ptr %a, i32 8, <7 x i1> %mask)

llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll

Lines changed: 43 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,66 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V
4+
; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32
5+
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32
46

57
define void @masked_store_nxv1i8(<vscale x 1 x i8> %val, ptr %a, <vscale x 1 x i1> %mask) nounwind {
6-
; CHECK-LABEL: masked_store_nxv1i8:
7-
; CHECK: # %bb.0:
8-
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
9-
; CHECK-NEXT: vse8.v v8, (a0), v0.t
10-
; CHECK-NEXT: ret
8+
; V-LABEL: masked_store_nxv1i8:
9+
; V: # %bb.0:
10+
; V-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
11+
; V-NEXT: vse8.v v8, (a0), v0.t
12+
; V-NEXT: ret
13+
;
14+
; ZVE32-LABEL: masked_store_nxv1i8:
15+
; ZVE32: # %bb.0:
16+
; ZVE32-NEXT: csrr a1, vlenb
17+
; ZVE32-NEXT: srli a1, a1, 3
18+
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
19+
; ZVE32-NEXT: vse8.v v8, (a0), v0.t
20+
; ZVE32-NEXT: ret
1121
call void @llvm.masked.store.v1i8.p0(<vscale x 1 x i8> %val, ptr %a, i32 1, <vscale x 1 x i1> %mask)
1222
ret void
1323
}
1424
declare void @llvm.masked.store.v1i8.p0(<vscale x 1 x i8>, ptr, i32, <vscale x 1 x i1>)
1525

1626
define void @masked_store_nxv1i16(<vscale x 1 x i16> %val, ptr %a, <vscale x 1 x i1> %mask) nounwind {
17-
; CHECK-LABEL: masked_store_nxv1i16:
18-
; CHECK: # %bb.0:
19-
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
20-
; CHECK-NEXT: vse16.v v8, (a0), v0.t
21-
; CHECK-NEXT: ret
27+
; V-LABEL: masked_store_nxv1i16:
28+
; V: # %bb.0:
29+
; V-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
30+
; V-NEXT: vse16.v v8, (a0), v0.t
31+
; V-NEXT: ret
32+
;
33+
; ZVE32-LABEL: masked_store_nxv1i16:
34+
; ZVE32: # %bb.0:
35+
; ZVE32-NEXT: csrr a1, vlenb
36+
; ZVE32-NEXT: srli a1, a1, 3
37+
; ZVE32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
38+
; ZVE32-NEXT: vse16.v v8, (a0), v0.t
39+
; ZVE32-NEXT: ret
2240
call void @llvm.masked.store.v1i16.p0(<vscale x 1 x i16> %val, ptr %a, i32 2, <vscale x 1 x i1> %mask)
2341
ret void
2442
}
2543
declare void @llvm.masked.store.v1i16.p0(<vscale x 1 x i16>, ptr, i32, <vscale x 1 x i1>)
2644

2745
define void @masked_store_nxv1i32(<vscale x 1 x i32> %val, ptr %a, <vscale x 1 x i1> %mask) nounwind {
28-
; CHECK-LABEL: masked_store_nxv1i32:
29-
; CHECK: # %bb.0:
30-
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
31-
; CHECK-NEXT: vse32.v v8, (a0), v0.t
32-
; CHECK-NEXT: ret
46+
; V-LABEL: masked_store_nxv1i32:
47+
; V: # %bb.0:
48+
; V-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
49+
; V-NEXT: vse32.v v8, (a0), v0.t
50+
; V-NEXT: ret
51+
;
52+
; ZVE32-LABEL: masked_store_nxv1i32:
53+
; ZVE32: # %bb.0:
54+
; ZVE32-NEXT: csrr a1, vlenb
55+
; ZVE32-NEXT: srli a1, a1, 3
56+
; ZVE32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
57+
; ZVE32-NEXT: vse32.v v8, (a0), v0.t
58+
; ZVE32-NEXT: ret
3359
call void @llvm.masked.store.v1i32.p0(<vscale x 1 x i32> %val, ptr %a, i32 4, <vscale x 1 x i1> %mask)
3460
ret void
3561
}
3662
declare void @llvm.masked.store.v1i32.p0(<vscale x 1 x i32>, ptr, i32, <vscale x 1 x i1>)
3763

38-
define void @masked_store_nxv1i64(<vscale x 1 x i64> %val, ptr %a, <vscale x 1 x i1> %mask) nounwind {
39-
; CHECK-LABEL: masked_store_nxv1i64:
40-
; CHECK: # %bb.0:
41-
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
42-
; CHECK-NEXT: vse64.v v8, (a0), v0.t
43-
; CHECK-NEXT: ret
44-
call void @llvm.masked.store.v1i64.p0(<vscale x 1 x i64> %val, ptr %a, i32 8, <vscale x 1 x i1> %mask)
45-
ret void
46-
}
47-
declare void @llvm.masked.store.v1i64.p0(<vscale x 1 x i64>, ptr, i32, <vscale x 1 x i1>)
48-
4964
define void @masked_store_nxv2i8(<vscale x 2 x i8> %val, ptr %a, <vscale x 2 x i1> %mask) nounwind {
5065
; CHECK-LABEL: masked_store_nxv2i8:
5166
; CHECK: # %bb.0:
@@ -79,17 +94,6 @@ define void @masked_store_nxv2i32(<vscale x 2 x i32> %val, ptr %a, <vscale x 2 x
7994
}
8095
declare void @llvm.masked.store.v2i32.p0(<vscale x 2 x i32>, ptr, i32, <vscale x 2 x i1>)
8196

82-
define void @masked_store_nxv2i64(<vscale x 2 x i64> %val, ptr %a, <vscale x 2 x i1> %mask) nounwind {
83-
; CHECK-LABEL: masked_store_nxv2i64:
84-
; CHECK: # %bb.0:
85-
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
86-
; CHECK-NEXT: vse64.v v8, (a0), v0.t
87-
; CHECK-NEXT: ret
88-
call void @llvm.masked.store.v2i64.p0(<vscale x 2 x i64> %val, ptr %a, i32 8, <vscale x 2 x i1> %mask)
89-
ret void
90-
}
91-
declare void @llvm.masked.store.v2i64.p0(<vscale x 2 x i64>, ptr, i32, <vscale x 2 x i1>)
92-
9397
define void @masked_store_nxv4i8(<vscale x 4 x i8> %val, ptr %a, <vscale x 4 x i1> %mask) nounwind {
9498
; CHECK-LABEL: masked_store_nxv4i8:
9599
; CHECK: # %bb.0:
@@ -123,17 +127,6 @@ define void @masked_store_nxv4i32(<vscale x 4 x i32> %val, ptr %a, <vscale x 4 x
123127
}
124128
declare void @llvm.masked.store.v4i32.p0(<vscale x 4 x i32>, ptr, i32, <vscale x 4 x i1>)
125129

126-
define void @masked_store_nxv4i64(<vscale x 4 x i64> %val, ptr %a, <vscale x 4 x i1> %mask) nounwind {
127-
; CHECK-LABEL: masked_store_nxv4i64:
128-
; CHECK: # %bb.0:
129-
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
130-
; CHECK-NEXT: vse64.v v8, (a0), v0.t
131-
; CHECK-NEXT: ret
132-
call void @llvm.masked.store.v4i64.p0(<vscale x 4 x i64> %val, ptr %a, i32 8, <vscale x 4 x i1> %mask)
133-
ret void
134-
}
135-
declare void @llvm.masked.store.v4i64.p0(<vscale x 4 x i64>, ptr, i32, <vscale x 4 x i1>)
136-
137130
define void @masked_store_nxv8i8(<vscale x 8 x i8> %val, ptr %a, <vscale x 8 x i1> %mask) nounwind {
138131
; CHECK-LABEL: masked_store_nxv8i8:
139132
; CHECK: # %bb.0:
@@ -167,17 +160,6 @@ define void @masked_store_nxv8i32(<vscale x 8 x i32> %val, ptr %a, <vscale x 8 x
167160
}
168161
declare void @llvm.masked.store.v8i32.p0(<vscale x 8 x i32>, ptr, i32, <vscale x 8 x i1>)
169162

170-
define void @masked_store_nxv8i64(<vscale x 8 x i64> %val, ptr %a, <vscale x 8 x i1> %mask) nounwind {
171-
; CHECK-LABEL: masked_store_nxv8i64:
172-
; CHECK: # %bb.0:
173-
; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
174-
; CHECK-NEXT: vse64.v v8, (a0), v0.t
175-
; CHECK-NEXT: ret
176-
call void @llvm.masked.store.v8i64.p0(<vscale x 8 x i64> %val, ptr %a, i32 8, <vscale x 8 x i1> %mask)
177-
ret void
178-
}
179-
declare void @llvm.masked.store.v8i64.p0(<vscale x 8 x i64>, ptr, i32, <vscale x 8 x i1>)
180-
181163
define void @masked_store_nxv16i8(<vscale x 16 x i8> %val, ptr %a, <vscale x 16 x i1> %mask) nounwind {
182164
; CHECK-LABEL: masked_store_nxv16i8:
183165
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)