Skip to content

Commit 60ad6e3

Browse files
authored
[SelectionDAG][RISCV] Use VP_LOAD to widen MLOAD in type legalization when possible. (#140595)
Padding the mask using 0 elements doesn't work for scalable vectors. Use VP_LOAD and change the VL instead. This fixes crash for Zve32x. Test file was split since i64 isn't a valid element type for Zve32x. Fixes #140198.
1 parent b4d2e50 commit 60ad6e3

File tree

5 files changed

+116
-74
lines changed

5 files changed

+116
-74
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6111,18 +6111,37 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) {
61116111
}
61126112

61136113
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
6114-
6115-
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
6114+
EVT VT = N->getValueType(0);
6115+
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
61166116
SDValue Mask = N->getMask();
61176117
EVT MaskVT = Mask.getValueType();
61186118
SDValue PassThru = GetWidenedVector(N->getPassThru());
61196119
ISD::LoadExtType ExtType = N->getExtensionType();
61206120
SDLoc dl(N);
61216121

6122+
EVT WideMaskVT =
6123+
EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(),
6124+
WidenVT.getVectorElementCount());
6125+
6126+
if (ExtType == ISD::NON_EXTLOAD &&
6127+
TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WidenVT) &&
6128+
TLI.isTypeLegal(WideMaskVT)) {
6129+
Mask = DAG.getInsertSubvector(dl, DAG.getUNDEF(WideMaskVT), Mask, 0);
6130+
SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
6131+
VT.getVectorElementCount());
6132+
SDValue NewLoad =
6133+
DAG.getLoadVP(N->getAddressingMode(), ISD::NON_EXTLOAD, WidenVT, dl,
6134+
N->getChain(), N->getBasePtr(), N->getOffset(), Mask, EVL,
6135+
N->getMemoryVT(), N->getMemOperand());
6136+
6137+
// Modified the chain - switch anything that used the old chain to use
6138+
// the new one.
6139+
ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
6140+
6141+
return NewLoad;
6142+
}
6143+
61226144
// The mask should be widened as well
6123-
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
6124-
MaskVT.getVectorElementType(),
6125-
WidenVT.getVectorNumElements());
61266145
Mask = ModifyToType(Mask, WideMaskVT, true);
61276146

61286147
SDValue Res = DAG.getMaskedLoad(

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -325,10 +325,7 @@ define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) {
325325
define <7 x float> @masked_load_v7f32(ptr %a, <7 x i1> %mask) {
326326
; CHECK-LABEL: masked_load_v7f32:
327327
; CHECK: # %bb.0:
328-
; CHECK-NEXT: li a1, 127
329-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
330-
; CHECK-NEXT: vmv.s.x v8, a1
331-
; CHECK-NEXT: vmand.mm v0, v0, v8
328+
; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma
332329
; CHECK-NEXT: vle32.v v8, (a0), v0.t
333330
; CHECK-NEXT: ret
334331
%load = call <7 x float> @llvm.masked.load.v7f32(ptr %a, i32 8, <7 x i1> %mask, <7 x float> undef)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,10 +334,7 @@ define <256 x i8> @masked_load_v256i8(ptr %a, <256 x i1> %mask) {
334334
define <7 x i8> @masked_load_v7i8(ptr %a, <7 x i1> %mask) {
335335
; CHECK-LABEL: masked_load_v7i8:
336336
; CHECK: # %bb.0:
337-
; CHECK-NEXT: li a1, 127
338-
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
339-
; CHECK-NEXT: vmv.s.x v8, a1
340-
; CHECK-NEXT: vmand.mm v0, v0, v8
337+
; CHECK-NEXT: vsetivli zero, 7, e8, mf2, ta, ma
341338
; CHECK-NEXT: vle8.v v8, (a0), v0.t
342339
; CHECK-NEXT: ret
343340
%load = call <7 x i8> @llvm.masked.load.v7i8(ptr %a, i32 8, <7 x i1> %mask, <7 x i8> undef)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
4+
5+
define <vscale x 1 x i64> @masked_load_nxv1i64(ptr %a, <vscale x 1 x i1> %mask) nounwind {
6+
; CHECK-LABEL: masked_load_nxv1i64:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
9+
; CHECK-NEXT: vle64.v v8, (a0), v0.t
10+
; CHECK-NEXT: ret
11+
%load = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr %a, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i64> undef)
12+
ret <vscale x 1 x i64> %load
13+
}
14+
declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
15+
16+
define <vscale x 2 x i64> @masked_load_nxv2i64(ptr %a, <vscale x 2 x i1> %mask) nounwind {
17+
; CHECK-LABEL: masked_load_nxv2i64:
18+
; CHECK: # %bb.0:
19+
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
20+
; CHECK-NEXT: vle64.v v8, (a0), v0.t
21+
; CHECK-NEXT: ret
22+
%load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr %a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
23+
ret <vscale x 2 x i64> %load
24+
}
25+
declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
26+
27+
define <vscale x 4 x i64> @masked_load_nxv4i64(ptr %a, <vscale x 4 x i1> %mask) nounwind {
28+
; CHECK-LABEL: masked_load_nxv4i64:
29+
; CHECK: # %bb.0:
30+
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
31+
; CHECK-NEXT: vle64.v v8, (a0), v0.t
32+
; CHECK-NEXT: ret
33+
%load = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr %a, i32 8, <vscale x 4 x i1> %mask, <vscale x 4 x i64> undef)
34+
ret <vscale x 4 x i64> %load
35+
}
36+
declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
37+
38+
define <vscale x 8 x i64> @masked_load_nxv8i64(ptr %a, <vscale x 8 x i1> %mask) nounwind {
39+
; CHECK-LABEL: masked_load_nxv8i64:
40+
; CHECK: # %bb.0:
41+
; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
42+
; CHECK-NEXT: vle64.v v8, (a0), v0.t
43+
; CHECK-NEXT: ret
44+
%load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
45+
ret <vscale x 8 x i64> %load
46+
}
47+
declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)

llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll

Lines changed: 43 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,66 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V
4+
; RUN: llc -mtriple=riscv32 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32
5+
; RUN: llc -mtriple=riscv64 -mattr=+zve32x,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVE32
46

57
define <vscale x 1 x i8> @masked_load_nxv1i8(ptr %a, <vscale x 1 x i1> %mask) nounwind {
6-
; CHECK-LABEL: masked_load_nxv1i8:
7-
; CHECK: # %bb.0:
8-
; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
9-
; CHECK-NEXT: vle8.v v8, (a0), v0.t
10-
; CHECK-NEXT: ret
8+
; V-LABEL: masked_load_nxv1i8:
9+
; V: # %bb.0:
10+
; V-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
11+
; V-NEXT: vle8.v v8, (a0), v0.t
12+
; V-NEXT: ret
13+
;
14+
; ZVE32-LABEL: masked_load_nxv1i8:
15+
; ZVE32: # %bb.0:
16+
; ZVE32-NEXT: csrr a1, vlenb
17+
; ZVE32-NEXT: srli a1, a1, 3
18+
; ZVE32-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
19+
; ZVE32-NEXT: vle8.v v8, (a0), v0.t
20+
; ZVE32-NEXT: ret
1121
%load = call <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr %a, i32 1, <vscale x 1 x i1> %mask, <vscale x 1 x i8> undef)
1222
ret <vscale x 1 x i8> %load
1323
}
1424
declare <vscale x 1 x i8> @llvm.masked.load.nxv1i8(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
1525

1626
define <vscale x 1 x i16> @masked_load_nxv1i16(ptr %a, <vscale x 1 x i1> %mask) nounwind {
17-
; CHECK-LABEL: masked_load_nxv1i16:
18-
; CHECK: # %bb.0:
19-
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
20-
; CHECK-NEXT: vle16.v v8, (a0), v0.t
21-
; CHECK-NEXT: ret
27+
; V-LABEL: masked_load_nxv1i16:
28+
; V: # %bb.0:
29+
; V-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
30+
; V-NEXT: vle16.v v8, (a0), v0.t
31+
; V-NEXT: ret
32+
;
33+
; ZVE32-LABEL: masked_load_nxv1i16:
34+
; ZVE32: # %bb.0:
35+
; ZVE32-NEXT: csrr a1, vlenb
36+
; ZVE32-NEXT: srli a1, a1, 3
37+
; ZVE32-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
38+
; ZVE32-NEXT: vle16.v v8, (a0), v0.t
39+
; ZVE32-NEXT: ret
2240
%load = call <vscale x 1 x i16> @llvm.masked.load.nxv1i16(ptr %a, i32 2, <vscale x 1 x i1> %mask, <vscale x 1 x i16> undef)
2341
ret <vscale x 1 x i16> %load
2442
}
2543
declare <vscale x 1 x i16> @llvm.masked.load.nxv1i16(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i16>)
2644

2745
define <vscale x 1 x i32> @masked_load_nxv1i32(ptr %a, <vscale x 1 x i1> %mask) nounwind {
28-
; CHECK-LABEL: masked_load_nxv1i32:
29-
; CHECK: # %bb.0:
30-
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
31-
; CHECK-NEXT: vle32.v v8, (a0), v0.t
32-
; CHECK-NEXT: ret
46+
; V-LABEL: masked_load_nxv1i32:
47+
; V: # %bb.0:
48+
; V-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
49+
; V-NEXT: vle32.v v8, (a0), v0.t
50+
; V-NEXT: ret
51+
;
52+
; ZVE32-LABEL: masked_load_nxv1i32:
53+
; ZVE32: # %bb.0:
54+
; ZVE32-NEXT: csrr a1, vlenb
55+
; ZVE32-NEXT: srli a1, a1, 3
56+
; ZVE32-NEXT: vsetvli zero, a1, e32, m1, ta, ma
57+
; ZVE32-NEXT: vle32.v v8, (a0), v0.t
58+
; ZVE32-NEXT: ret
3359
%load = call <vscale x 1 x i32> @llvm.masked.load.nxv1i32(ptr %a, i32 4, <vscale x 1 x i1> %mask, <vscale x 1 x i32> undef)
3460
ret <vscale x 1 x i32> %load
3561
}
3662
declare <vscale x 1 x i32> @llvm.masked.load.nxv1i32(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i32>)
3763

38-
define <vscale x 1 x i64> @masked_load_nxv1i64(ptr %a, <vscale x 1 x i1> %mask) nounwind {
39-
; CHECK-LABEL: masked_load_nxv1i64:
40-
; CHECK: # %bb.0:
41-
; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
42-
; CHECK-NEXT: vle64.v v8, (a0), v0.t
43-
; CHECK-NEXT: ret
44-
%load = call <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr %a, i32 8, <vscale x 1 x i1> %mask, <vscale x 1 x i64> undef)
45-
ret <vscale x 1 x i64> %load
46-
}
47-
declare <vscale x 1 x i64> @llvm.masked.load.nxv1i64(ptr, i32, <vscale x 1 x i1>, <vscale x 1 x i64>)
48-
4964
define <vscale x 2 x i8> @masked_load_nxv2i8(ptr %a, <vscale x 2 x i1> %mask) nounwind {
5065
; CHECK-LABEL: masked_load_nxv2i8:
5166
; CHECK: # %bb.0:
@@ -79,17 +94,6 @@ define <vscale x 2 x i32> @masked_load_nxv2i32(ptr %a, <vscale x 2 x i1> %mask)
7994
}
8095
declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
8196

82-
define <vscale x 2 x i64> @masked_load_nxv2i64(ptr %a, <vscale x 2 x i1> %mask) nounwind {
83-
; CHECK-LABEL: masked_load_nxv2i64:
84-
; CHECK: # %bb.0:
85-
; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
86-
; CHECK-NEXT: vle64.v v8, (a0), v0.t
87-
; CHECK-NEXT: ret
88-
%load = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr %a, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
89-
ret <vscale x 2 x i64> %load
90-
}
91-
declare <vscale x 2 x i64> @llvm.masked.load.nxv2i64(ptr, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
92-
9397
define <vscale x 4 x i8> @masked_load_nxv4i8(ptr %a, <vscale x 4 x i1> %mask) nounwind {
9498
; CHECK-LABEL: masked_load_nxv4i8:
9599
; CHECK: # %bb.0:
@@ -123,17 +127,6 @@ define <vscale x 4 x i32> @masked_load_nxv4i32(ptr %a, <vscale x 4 x i1> %mask)
123127
}
124128
declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i32>)
125129

126-
define <vscale x 4 x i64> @masked_load_nxv4i64(ptr %a, <vscale x 4 x i1> %mask) nounwind {
127-
; CHECK-LABEL: masked_load_nxv4i64:
128-
; CHECK: # %bb.0:
129-
; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
130-
; CHECK-NEXT: vle64.v v8, (a0), v0.t
131-
; CHECK-NEXT: ret
132-
%load = call <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr %a, i32 8, <vscale x 4 x i1> %mask, <vscale x 4 x i64> undef)
133-
ret <vscale x 4 x i64> %load
134-
}
135-
declare <vscale x 4 x i64> @llvm.masked.load.nxv4i64(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x i64>)
136-
137130
define <vscale x 8 x i8> @masked_load_nxv8i8(ptr %a, <vscale x 8 x i1> %mask) nounwind {
138131
; CHECK-LABEL: masked_load_nxv8i8:
139132
; CHECK: # %bb.0:
@@ -167,17 +160,6 @@ define <vscale x 8 x i32> @masked_load_nxv8i32(ptr %a, <vscale x 8 x i1> %mask)
167160
}
168161
declare <vscale x 8 x i32> @llvm.masked.load.nxv8i32(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
169162

170-
define <vscale x 8 x i64> @masked_load_nxv8i64(ptr %a, <vscale x 8 x i1> %mask) nounwind {
171-
; CHECK-LABEL: masked_load_nxv8i64:
172-
; CHECK: # %bb.0:
173-
; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
174-
; CHECK-NEXT: vle64.v v8, (a0), v0.t
175-
; CHECK-NEXT: ret
176-
%load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr %a, i32 8, <vscale x 8 x i1> %mask, <vscale x 8 x i64> undef)
177-
ret <vscale x 8 x i64> %load
178-
}
179-
declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64(ptr, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
180-
181163
define <vscale x 16 x i8> @masked_load_nxv16i8(ptr %a, <vscale x 16 x i1> %mask) nounwind {
182164
; CHECK-LABEL: masked_load_nxv16i8:
183165
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)