Skip to content

Commit 8191cea

Browse files
committed
[SelectionDAG] Support integer promotion for VP_LOAD and VP_STORE
Add integer promotion support for VP_LOAD and VP_STORE via legalization of extend and truncate of each form. Patch commandeered from: https://reviews.llvm.org/D109377
1 parent 536e0eb commit 8191cea

File tree

5 files changed

+127
-18
lines changed

5 files changed

+127
-18
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
7979
case ISD::EXTRACT_VECTOR_ELT:
8080
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
8181
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
82+
case ISD::VP_LOAD:
83+
Res = PromoteIntRes_VP_LOAD(cast<VPLoadSDNode>(N));
84+
break;
8285
case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
8386
break;
8487
case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
@@ -875,6 +878,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
875878
return Res;
876879
}
877880

881+
SDValue DAGTypeLegalizer::PromoteIntRes_VP_LOAD(VPLoadSDNode *N) {
882+
assert(!N->isIndexed() && "Indexed vp_load during type legalization!");
883+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
884+
ISD::LoadExtType ExtType = (N->getExtensionType() == ISD::NON_EXTLOAD)
885+
? ISD::EXTLOAD
886+
: N->getExtensionType();
887+
SDLoc dl(N);
888+
SDValue Res =
889+
DAG.getLoadVP(N->getAddressingMode(), ExtType, NVT, dl, N->getChain(),
890+
N->getBasePtr(), N->getOffset(), N->getMask(),
891+
N->getVectorLength(), N->getMemoryVT(), N->getMemOperand());
892+
// Legalize the chain result - switch anything that used the old chain to
893+
// use the new one.
894+
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
895+
return Res;
896+
}
897+
878898
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
879899
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
880900
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
@@ -1817,8 +1837,14 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
18171837
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
18181838
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
18191839
OpNo); break;
1840+
case ISD::VP_STORE:
1841+
Res = PromoteIntOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
1842+
break;
18201843
case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
18211844
OpNo); break;
1845+
case ISD::VP_LOAD:
1846+
Res = PromoteIntOp_VP_LOAD(cast<VPLoadSDNode>(N), OpNo);
1847+
break;
18221848
case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
18231849
OpNo); break;
18241850
case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
@@ -2216,6 +2242,50 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
22162242
N->getMemoryVT(), N->getMemOperand());
22172243
}
22182244

2245+
SDValue DAGTypeLegalizer::PromoteIntOp_VP_STORE(VPStoreSDNode *N,
2246+
unsigned OpNo) {
2247+
SDValue DataOp = N->getValue();
2248+
SDValue Operand = N->getOperand(OpNo);
2249+
2250+
if (OpNo >= 4) {
2251+
// The Mask or EVL. Update in place.
2252+
EVT DataVT = DataOp.getValueType();
2253+
SDValue PromotedOperand = OpNo == 4 ? PromoteTargetBoolean(Operand, DataVT)
2254+
: ZExtPromotedInteger(Operand);
2255+
SmallVector<SDValue, 6> NewOps(N->op_begin(), N->op_end());
2256+
NewOps[OpNo] = PromotedOperand;
2257+
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
2258+
}
2259+
2260+
assert(OpNo == 1 && "Unexpected operand for promotion");
2261+
DataOp = GetPromotedInteger(DataOp);
2262+
2263+
assert(!N->isIndexed() && "expecting unindexed vp_store!");
2264+
2265+
return DAG.getTruncStoreVP(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),
2266+
N->getMask(), N->getVectorLength(),
2267+
N->getMemoryVT(), N->getMemOperand(),
2268+
N->isCompressingStore());
2269+
}
2270+
2271+
SDValue DAGTypeLegalizer::PromoteIntOp_VP_LOAD(VPLoadSDNode *N, unsigned OpNo) {
2272+
assert(OpNo >= 3 && "Only know how to promote the mask or length!");
2273+
EVT DataVT = N->getValueType(0);
2274+
SDValue Operand = N->getOperand(OpNo);
2275+
SDValue PromotedOperand = OpNo == 3 ? PromoteTargetBoolean(Operand, DataVT)
2276+
: ZExtPromotedInteger(Operand);
2277+
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
2278+
NewOps[OpNo] = PromotedOperand;
2279+
SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
2280+
if (Res == N)
2281+
return SDValue(Res, 0);
2282+
2283+
// Update triggered CSE, do our own replacement since caller can't.
2284+
ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
2285+
ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
2286+
return SDValue();
2287+
}
2288+
22192289
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
22202290
unsigned OpNo) {
22212291
SDValue DataOp = N->getValue();

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
318318
SDValue PromoteIntRes_FREEZE(SDNode *N);
319319
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
320320
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
321+
SDValue PromoteIntRes_VP_LOAD(VPLoadSDNode *N);
321322
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
322323
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
323324
SDValue PromoteIntRes_Overflow(SDNode *N);
@@ -395,6 +396,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
395396
SDValue PromoteIntOp_ExpOp(SDNode *N);
396397
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
397398
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
399+
SDValue PromoteIntOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
400+
SDValue PromoteIntOp_VP_LOAD(VPLoadSDNode *N, unsigned OpNo);
398401
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
399402
SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
400403
SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,18 @@ define void @vpstore_v4i8(<4 x i8> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl
2828
ret void
2929
}
3030

31+
declare void @llvm.vp.store.v8i7.v8i7.p0(<8 x i7>, <8 x i7>*, <8 x i1>, i32)
32+
33+
define void @vpstore_v8i7(<8 x i7> %val, <8 x i7>* %ptr, <8 x i1> %m, i32 zeroext %evl) {
34+
; CHECK-LABEL: vpstore_v8i7:
35+
; CHECK: # %bb.0:
36+
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
37+
; CHECK-NEXT: vse8.v v8, (a0), v0.t
38+
; CHECK-NEXT: ret
39+
call void @llvm.vp.store.v8i7.v8i7.p0(<8 x i7> %val, <8 x i7>* %ptr, <8 x i1> %m, i32 %evl)
40+
ret void
41+
}
42+
3143
declare void @llvm.vp.store.v8i8.p0(<8 x i8>, ptr, <8 x i1>, i32)
3244

3345
define void @vpstore_v8i8(<8 x i8> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
@@ -287,10 +299,10 @@ define void @vpstore_v32f64(<32 x double> %val, ptr %ptr, <32 x i1> %m, i32 zero
287299
; CHECK: # %bb.0:
288300
; CHECK-NEXT: li a3, 16
289301
; CHECK-NEXT: mv a2, a1
290-
; CHECK-NEXT: bltu a1, a3, .LBB23_2
302+
; CHECK-NEXT: bltu a1, a3, .LBB24_2
291303
; CHECK-NEXT: # %bb.1:
292304
; CHECK-NEXT: li a2, 16
293-
; CHECK-NEXT: .LBB23_2:
305+
; CHECK-NEXT: .LBB24_2:
294306
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
295307
; CHECK-NEXT: vse64.v v8, (a0), v0.t
296308
; CHECK-NEXT: addi a2, a1, -16

llvm/test/CodeGen/RISCV/rvv/vpload.ll

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,18 @@ define <vscale x 3 x i8> @vpload_nxv3i8(ptr %ptr, <vscale x 3 x i1> %m, i32 zero
5252
ret <vscale x 3 x i8> %load
5353
}
5454

55+
declare <vscale x 4 x i6> @llvm.vp.load.nxv4i6.nxv4i6.p0(<vscale x 4 x i6>*, <vscale x 4 x i1>, i32)
56+
57+
define <vscale x 4 x i6> @vpload_nxv4i6(<vscale x 4 x i6>* %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
58+
; CHECK-LABEL: vpload_nxv4i6:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
61+
; CHECK-NEXT: vle8.v v8, (a0), v0.t
62+
; CHECK-NEXT: ret
63+
%load = call <vscale x 4 x i6> @llvm.vp.load.nxv4i6.nxv4i6.p0(<vscale x 4 x i6>* %ptr, <vscale x 4 x i1> %m, i32 %evl)
64+
ret <vscale x 4 x i6> %load
65+
}
66+
5567
declare <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr, <vscale x 4 x i1>, i32)
5668

5769
define <vscale x 4 x i8> @vpload_nxv4i8(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
@@ -466,10 +478,10 @@ define <vscale x 16 x double> @vpload_nxv16f64(ptr %ptr, <vscale x 16 x i1> %m,
466478
; CHECK-NEXT: vslidedown.vx v0, v0, a5
467479
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
468480
; CHECK-NEXT: vle64.v v16, (a4), v0.t
469-
; CHECK-NEXT: bltu a1, a2, .LBB37_2
481+
; CHECK-NEXT: bltu a1, a2, .LBB38_2
470482
; CHECK-NEXT: # %bb.1:
471483
; CHECK-NEXT: mv a1, a2
472-
; CHECK-NEXT: .LBB37_2:
484+
; CHECK-NEXT: .LBB38_2:
473485
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
474486
; CHECK-NEXT: vmv1r.v v0, v8
475487
; CHECK-NEXT: vle64.v v8, (a0), v0.t
@@ -496,10 +508,10 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
496508
; CHECK-NEXT: slli a5, a3, 1
497509
; CHECK-NEXT: vmv1r.v v8, v0
498510
; CHECK-NEXT: mv a4, a2
499-
; CHECK-NEXT: bltu a2, a5, .LBB38_2
511+
; CHECK-NEXT: bltu a2, a5, .LBB39_2
500512
; CHECK-NEXT: # %bb.1:
501513
; CHECK-NEXT: mv a4, a5
502-
; CHECK-NEXT: .LBB38_2:
514+
; CHECK-NEXT: .LBB39_2:
503515
; CHECK-NEXT: sub a6, a4, a3
504516
; CHECK-NEXT: sltu a7, a4, a6
505517
; CHECK-NEXT: addi a7, a7, -1
@@ -515,21 +527,21 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
515527
; CHECK-NEXT: sltu a2, a2, a5
516528
; CHECK-NEXT: addi a2, a2, -1
517529
; CHECK-NEXT: and a2, a2, a5
518-
; CHECK-NEXT: bltu a2, a3, .LBB38_4
530+
; CHECK-NEXT: bltu a2, a3, .LBB39_4
519531
; CHECK-NEXT: # %bb.3:
520532
; CHECK-NEXT: mv a2, a3
521-
; CHECK-NEXT: .LBB38_4:
533+
; CHECK-NEXT: .LBB39_4:
522534
; CHECK-NEXT: slli a5, a3, 4
523535
; CHECK-NEXT: add a5, a0, a5
524536
; CHECK-NEXT: srli a6, a3, 2
525537
; CHECK-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
526538
; CHECK-NEXT: vslidedown.vx v0, v8, a6
527539
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
528540
; CHECK-NEXT: vle64.v v24, (a5), v0.t
529-
; CHECK-NEXT: bltu a4, a3, .LBB38_6
541+
; CHECK-NEXT: bltu a4, a3, .LBB39_6
530542
; CHECK-NEXT: # %bb.5:
531543
; CHECK-NEXT: mv a4, a3
532-
; CHECK-NEXT: .LBB38_6:
544+
; CHECK-NEXT: .LBB39_6:
533545
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
534546
; CHECK-NEXT: vmv1r.v v0, v8
535547
; CHECK-NEXT: vle64.v v8, (a0), v0.t

llvm/test/CodeGen/RISCV/rvv/vpstore.ll

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,18 @@ define void @vpstore_nxv4i16(<vscale x 4 x i16> %val, ptr %ptr, <vscale x 4 x i1
100100
ret void
101101
}
102102

103+
declare void @llvm.vp.store.nxv8i12.nxv8i12.p0(<vscale x 8 x i12>, <vscale x 8 x i12>*, <vscale x 8 x i1>, i32)
104+
105+
define void @vpstore_nxv8i12(<vscale x 8 x i12> %val, <vscale x 8 x i12>* %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
106+
; CHECK-LABEL: vpstore_nxv8i12:
107+
; CHECK: # %bb.0:
108+
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
109+
; CHECK-NEXT: vse16.v v8, (a0), v0.t
110+
; CHECK-NEXT: ret
111+
call void @llvm.vp.store.nxv8i12.nxv8i12.p0(<vscale x 8 x i12> %val, <vscale x 8 x i12>* %ptr, <vscale x 8 x i1> %m, i32 %evl)
112+
ret void
113+
}
114+
103115
declare void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16>, ptr, <vscale x 8 x i1>, i32)
104116

105117
define void @vpstore_nxv8i16(<vscale x 8 x i16> %val, ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
@@ -371,10 +383,10 @@ define void @vpstore_nxv16f64(<vscale x 16 x double> %val, ptr %ptr, <vscale x 1
371383
; CHECK: # %bb.0:
372384
; CHECK-NEXT: csrr a2, vlenb
373385
; CHECK-NEXT: mv a3, a1
374-
; CHECK-NEXT: bltu a1, a2, .LBB30_2
386+
; CHECK-NEXT: bltu a1, a2, .LBB31_2
375387
; CHECK-NEXT: # %bb.1:
376388
; CHECK-NEXT: mv a3, a2
377-
; CHECK-NEXT: .LBB30_2:
389+
; CHECK-NEXT: .LBB31_2:
378390
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
379391
; CHECK-NEXT: vse64.v v8, (a0), v0.t
380392
; CHECK-NEXT: sub a3, a1, a2
@@ -404,15 +416,15 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
404416
; CHECK-NEXT: slli a4, a3, 1
405417
; CHECK-NEXT: vmv1r.v v24, v0
406418
; CHECK-NEXT: mv a5, a2
407-
; CHECK-NEXT: bltu a2, a4, .LBB31_2
419+
; CHECK-NEXT: bltu a2, a4, .LBB32_2
408420
; CHECK-NEXT: # %bb.1:
409421
; CHECK-NEXT: mv a5, a4
410-
; CHECK-NEXT: .LBB31_2:
422+
; CHECK-NEXT: .LBB32_2:
411423
; CHECK-NEXT: mv a6, a5
412-
; CHECK-NEXT: bltu a5, a3, .LBB31_4
424+
; CHECK-NEXT: bltu a5, a3, .LBB32_4
413425
; CHECK-NEXT: # %bb.3:
414426
; CHECK-NEXT: mv a6, a3
415-
; CHECK-NEXT: .LBB31_4:
427+
; CHECK-NEXT: .LBB32_4:
416428
; CHECK-NEXT: addi sp, sp, -16
417429
; CHECK-NEXT: .cfi_def_cfa_offset 16
418430
; CHECK-NEXT: csrr a7, vlenb
@@ -440,10 +452,10 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
440452
; CHECK-NEXT: addi a2, a2, -1
441453
; CHECK-NEXT: and a0, a2, a0
442454
; CHECK-NEXT: vse64.v v16, (a5), v0.t
443-
; CHECK-NEXT: bltu a0, a3, .LBB31_6
455+
; CHECK-NEXT: bltu a0, a3, .LBB32_6
444456
; CHECK-NEXT: # %bb.5:
445457
; CHECK-NEXT: mv a0, a3
446-
; CHECK-NEXT: .LBB31_6:
458+
; CHECK-NEXT: .LBB32_6:
447459
; CHECK-NEXT: slli a2, a3, 4
448460
; CHECK-NEXT: add a1, a1, a2
449461
; CHECK-NEXT: srli a3, a3, 2

0 commit comments

Comments
 (0)