Skip to content

Commit 5623b19

Browse files
committed
[SelectionDAG] Support integer promotion for VP_LOAD and VP_STORE
Add integer promotion support for VP_LOAD and VP_STORE via legalization of extend and truncate of each form. Patch commandeered from: https://reviews.llvm.org/D109377
1 parent 10a1ea9 commit 5623b19

File tree

5 files changed

+127
-18
lines changed

5 files changed

+127
-18
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
8383
case ISD::EXTRACT_VECTOR_ELT:
8484
Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
8585
case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break;
86+
case ISD::VP_LOAD:
87+
Res = PromoteIntRes_VP_LOAD(cast<VPLoadSDNode>(N));
88+
break;
8689
case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));
8790
break;
8891
case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N));
@@ -957,6 +960,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
957960
return Res;
958961
}
959962

963+
SDValue DAGTypeLegalizer::PromoteIntRes_VP_LOAD(VPLoadSDNode *N) {
964+
assert(!N->isIndexed() && "Indexed vp_load during type legalization!");
965+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
966+
ISD::LoadExtType ExtType = (N->getExtensionType() == ISD::NON_EXTLOAD)
967+
? ISD::EXTLOAD
968+
: N->getExtensionType();
969+
SDLoc dl(N);
970+
SDValue Res =
971+
DAG.getLoadVP(N->getAddressingMode(), ExtType, NVT, dl, N->getChain(),
972+
N->getBasePtr(), N->getOffset(), N->getMask(),
973+
N->getVectorLength(), N->getMemoryVT(), N->getMemOperand());
974+
// Legalize the chain result - switch anything that used the old chain to
975+
// use the new one.
976+
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
977+
return Res;
978+
}
979+
960980
SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
961981
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
962982
SDValue ExtPassThru = GetPromotedInteger(N->getPassThru());
@@ -1957,8 +1977,14 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
19571977
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
19581978
case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
19591979
OpNo); break;
1980+
case ISD::VP_STORE:
1981+
Res = PromoteIntOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
1982+
break;
19601983
case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N),
19611984
OpNo); break;
1985+
case ISD::VP_LOAD:
1986+
Res = PromoteIntOp_VP_LOAD(cast<VPLoadSDNode>(N), OpNo);
1987+
break;
19621988
case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N),
19631989
OpNo); break;
19641990
case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N),
@@ -2378,6 +2404,50 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
23782404
N->getMemoryVT(), N->getMemOperand());
23792405
}
23802406

2407+
SDValue DAGTypeLegalizer::PromoteIntOp_VP_STORE(VPStoreSDNode *N,
2408+
unsigned OpNo) {
2409+
SDValue DataOp = N->getValue();
2410+
SDValue Operand = N->getOperand(OpNo);
2411+
2412+
if (OpNo >= 4) {
2413+
// The Mask or EVL. Update in place.
2414+
EVT DataVT = DataOp.getValueType();
2415+
SDValue PromotedOperand = OpNo == 4 ? PromoteTargetBoolean(Operand, DataVT)
2416+
: ZExtPromotedInteger(Operand);
2417+
SmallVector<SDValue, 6> NewOps(N->op_begin(), N->op_end());
2418+
NewOps[OpNo] = PromotedOperand;
2419+
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
2420+
}
2421+
2422+
assert(OpNo == 1 && "Unexpected operand for promotion");
2423+
DataOp = GetPromotedInteger(DataOp);
2424+
2425+
assert(!N->isIndexed() && "expecting unindexed vp_store!");
2426+
2427+
return DAG.getTruncStoreVP(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(),
2428+
N->getMask(), N->getVectorLength(),
2429+
N->getMemoryVT(), N->getMemOperand(),
2430+
N->isCompressingStore());
2431+
}
2432+
2433+
SDValue DAGTypeLegalizer::PromoteIntOp_VP_LOAD(VPLoadSDNode *N, unsigned OpNo) {
2434+
assert(OpNo >= 3 && "Only know how to promote the mask or length!");
2435+
EVT DataVT = N->getValueType(0);
2436+
SDValue Operand = N->getOperand(OpNo);
2437+
SDValue PromotedOperand = OpNo == 3 ? PromoteTargetBoolean(Operand, DataVT)
2438+
: ZExtPromotedInteger(Operand);
2439+
SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end());
2440+
NewOps[OpNo] = PromotedOperand;
2441+
SDNode *Res = DAG.UpdateNodeOperands(N, NewOps);
2442+
if (Res == N)
2443+
return SDValue(Res, 0);
2444+
2445+
// Update triggered CSE, do our own replacement since caller can't.
2446+
ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0));
2447+
ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1));
2448+
return SDValue();
2449+
}
2450+
23812451
SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
23822452
unsigned OpNo) {
23832453
SDValue DataOp = N->getValue();

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
338338
SDValue PromoteIntRes_FREEZE(SDNode *N);
339339
SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
340340
SDValue PromoteIntRes_LOAD(LoadSDNode *N);
341+
SDValue PromoteIntRes_VP_LOAD(VPLoadSDNode *N);
341342
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
342343
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
343344
SDValue PromoteIntRes_VECTOR_COMPRESS(SDNode *N);
@@ -420,6 +421,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
420421
SDValue PromoteIntOp_ExpOp(SDNode *N);
421422
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
422423
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
424+
SDValue PromoteIntOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
425+
SDValue PromoteIntOp_VP_LOAD(VPLoadSDNode *N, unsigned OpNo);
423426
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
424427
SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo);
425428
SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,18 @@ define void @vpstore_v4i8(<4 x i8> %val, ptr %ptr, <4 x i1> %m, i32 zeroext %evl
2828
ret void
2929
}
3030

31+
declare void @llvm.vp.store.v8i7.v8i7.p0(<8 x i7>, <8 x i7>*, <8 x i1>, i32)
32+
33+
define void @vpstore_v8i7(<8 x i7> %val, <8 x i7>* %ptr, <8 x i1> %m, i32 zeroext %evl) {
34+
; CHECK-LABEL: vpstore_v8i7:
35+
; CHECK: # %bb.0:
36+
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
37+
; CHECK-NEXT: vse8.v v8, (a0), v0.t
38+
; CHECK-NEXT: ret
39+
call void @llvm.vp.store.v8i7.v8i7.p0(<8 x i7> %val, <8 x i7>* %ptr, <8 x i1> %m, i32 %evl)
40+
ret void
41+
}
42+
3143
declare void @llvm.vp.store.v8i8.p0(<8 x i8>, ptr, <8 x i1>, i32)
3244

3345
define void @vpstore_v8i8(<8 x i8> %val, ptr %ptr, <8 x i1> %m, i32 zeroext %evl) {
@@ -285,10 +297,10 @@ define void @vpstore_v32f64(<32 x double> %val, ptr %ptr, <32 x i1> %m, i32 zero
285297
; CHECK: # %bb.0:
286298
; CHECK-NEXT: li a3, 16
287299
; CHECK-NEXT: mv a2, a1
288-
; CHECK-NEXT: bltu a1, a3, .LBB23_2
300+
; CHECK-NEXT: bltu a1, a3, .LBB24_2
289301
; CHECK-NEXT: # %bb.1:
290302
; CHECK-NEXT: li a2, 16
291-
; CHECK-NEXT: .LBB23_2:
303+
; CHECK-NEXT: .LBB24_2:
292304
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
293305
; CHECK-NEXT: vse64.v v8, (a0), v0.t
294306
; CHECK-NEXT: addi a2, a1, -16

llvm/test/CodeGen/RISCV/rvv/vpload.ll

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,18 @@ define <vscale x 3 x i8> @vpload_nxv3i8(ptr %ptr, <vscale x 3 x i1> %m, i32 zero
6565
ret <vscale x 3 x i8> %load
6666
}
6767

68+
declare <vscale x 4 x i6> @llvm.vp.load.nxv4i6.nxv4i6.p0(<vscale x 4 x i6>*, <vscale x 4 x i1>, i32)
69+
70+
define <vscale x 4 x i6> @vpload_nxv4i6(<vscale x 4 x i6>* %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
71+
; CHECK-LABEL: vpload_nxv4i6:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
74+
; CHECK-NEXT: vle8.v v8, (a0), v0.t
75+
; CHECK-NEXT: ret
76+
%load = call <vscale x 4 x i6> @llvm.vp.load.nxv4i6.nxv4i6.p0(<vscale x 4 x i6>* %ptr, <vscale x 4 x i1> %m, i32 %evl)
77+
ret <vscale x 4 x i6> %load
78+
}
79+
6880
declare <vscale x 4 x i8> @llvm.vp.load.nxv4i8.p0(ptr, <vscale x 4 x i1>, i32)
6981

7082
define <vscale x 4 x i8> @vpload_nxv4i8(ptr %ptr, <vscale x 4 x i1> %m, i32 zeroext %evl) {
@@ -523,10 +535,10 @@ define <vscale x 16 x double> @vpload_nxv16f64(ptr %ptr, <vscale x 16 x i1> %m,
523535
; CHECK-NEXT: add a4, a0, a4
524536
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
525537
; CHECK-NEXT: vle64.v v16, (a4), v0.t
526-
; CHECK-NEXT: bltu a1, a2, .LBB43_2
538+
; CHECK-NEXT: bltu a1, a2, .LBB44_2
527539
; CHECK-NEXT: # %bb.1:
528540
; CHECK-NEXT: mv a1, a2
529-
; CHECK-NEXT: .LBB43_2:
541+
; CHECK-NEXT: .LBB44_2:
530542
; CHECK-NEXT: vmv1r.v v0, v8
531543
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
532544
; CHECK-NEXT: vle64.v v8, (a0), v0.t
@@ -553,10 +565,10 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
553565
; CHECK-NEXT: slli a5, a3, 1
554566
; CHECK-NEXT: vmv1r.v v8, v0
555567
; CHECK-NEXT: mv a4, a2
556-
; CHECK-NEXT: bltu a2, a5, .LBB44_2
568+
; CHECK-NEXT: bltu a2, a5, .LBB45_2
557569
; CHECK-NEXT: # %bb.1:
558570
; CHECK-NEXT: mv a4, a5
559-
; CHECK-NEXT: .LBB44_2:
571+
; CHECK-NEXT: .LBB45_2:
560572
; CHECK-NEXT: sub a6, a4, a3
561573
; CHECK-NEXT: sltu a7, a4, a6
562574
; CHECK-NEXT: addi a7, a7, -1
@@ -572,21 +584,21 @@ define <vscale x 16 x double> @vpload_nxv17f64(ptr %ptr, ptr %out, <vscale x 17
572584
; CHECK-NEXT: sltu a2, a2, a5
573585
; CHECK-NEXT: addi a2, a2, -1
574586
; CHECK-NEXT: and a2, a2, a5
575-
; CHECK-NEXT: bltu a2, a3, .LBB44_4
587+
; CHECK-NEXT: bltu a2, a3, .LBB45_4
576588
; CHECK-NEXT: # %bb.3:
577589
; CHECK-NEXT: mv a2, a3
578-
; CHECK-NEXT: .LBB44_4:
590+
; CHECK-NEXT: .LBB45_4:
579591
; CHECK-NEXT: slli a5, a3, 4
580592
; CHECK-NEXT: srli a6, a3, 2
581593
; CHECK-NEXT: vsetvli a7, zero, e8, mf2, ta, ma
582594
; CHECK-NEXT: vslidedown.vx v0, v8, a6
583595
; CHECK-NEXT: add a5, a0, a5
584596
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
585597
; CHECK-NEXT: vle64.v v24, (a5), v0.t
586-
; CHECK-NEXT: bltu a4, a3, .LBB44_6
598+
; CHECK-NEXT: bltu a4, a3, .LBB45_6
587599
; CHECK-NEXT: # %bb.5:
588600
; CHECK-NEXT: mv a4, a3
589-
; CHECK-NEXT: .LBB44_6:
601+
; CHECK-NEXT: .LBB45_6:
590602
; CHECK-NEXT: vmv1r.v v0, v8
591603
; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma
592604
; CHECK-NEXT: vle64.v v8, (a0), v0.t

llvm/test/CodeGen/RISCV/rvv/vpstore.ll

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,18 @@ define void @vpstore_nxv4i16(<vscale x 4 x i16> %val, ptr %ptr, <vscale x 4 x i1
104104
ret void
105105
}
106106

107+
declare void @llvm.vp.store.nxv8i12.nxv8i12.p0(<vscale x 8 x i12>, <vscale x 8 x i12>*, <vscale x 8 x i1>, i32)
108+
109+
define void @vpstore_nxv8i12(<vscale x 8 x i12> %val, <vscale x 8 x i12>* %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
110+
; CHECK-LABEL: vpstore_nxv8i12:
111+
; CHECK: # %bb.0:
112+
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
113+
; CHECK-NEXT: vse16.v v8, (a0), v0.t
114+
; CHECK-NEXT: ret
115+
call void @llvm.vp.store.nxv8i12.nxv8i12.p0(<vscale x 8 x i12> %val, <vscale x 8 x i12>* %ptr, <vscale x 8 x i1> %m, i32 %evl)
116+
ret void
117+
}
118+
107119
declare void @llvm.vp.store.nxv8i16.p0(<vscale x 8 x i16>, ptr, <vscale x 8 x i1>, i32)
108120

109121
define void @vpstore_nxv8i16(<vscale x 8 x i16> %val, ptr %ptr, <vscale x 8 x i1> %m, i32 zeroext %evl) {
@@ -421,10 +433,10 @@ define void @vpstore_nxv16f64(<vscale x 16 x double> %val, ptr %ptr, <vscale x 1
421433
; CHECK: # %bb.0:
422434
; CHECK-NEXT: csrr a2, vlenb
423435
; CHECK-NEXT: mv a3, a1
424-
; CHECK-NEXT: bltu a1, a2, .LBB34_2
436+
; CHECK-NEXT: bltu a1, a2, .LBB35_2
425437
; CHECK-NEXT: # %bb.1:
426438
; CHECK-NEXT: mv a3, a2
427-
; CHECK-NEXT: .LBB34_2:
439+
; CHECK-NEXT: .LBB35_2:
428440
; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma
429441
; CHECK-NEXT: vse64.v v8, (a0), v0.t
430442
; CHECK-NEXT: sub a3, a1, a2
@@ -454,15 +466,15 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
454466
; CHECK-NEXT: slli a4, a3, 1
455467
; CHECK-NEXT: vmv1r.v v24, v0
456468
; CHECK-NEXT: mv a5, a2
457-
; CHECK-NEXT: bltu a2, a4, .LBB35_2
469+
; CHECK-NEXT: bltu a2, a4, .LBB36_2
458470
; CHECK-NEXT: # %bb.1:
459471
; CHECK-NEXT: mv a5, a4
460-
; CHECK-NEXT: .LBB35_2:
472+
; CHECK-NEXT: .LBB36_2:
461473
; CHECK-NEXT: mv a6, a5
462-
; CHECK-NEXT: bltu a5, a3, .LBB35_4
474+
; CHECK-NEXT: bltu a5, a3, .LBB36_4
463475
; CHECK-NEXT: # %bb.3:
464476
; CHECK-NEXT: mv a6, a3
465-
; CHECK-NEXT: .LBB35_4:
477+
; CHECK-NEXT: .LBB36_4:
466478
; CHECK-NEXT: addi sp, sp, -16
467479
; CHECK-NEXT: .cfi_def_cfa_offset 16
468480
; CHECK-NEXT: csrr a7, vlenb
@@ -490,10 +502,10 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, ptr %ptr, <vscale x 1
490502
; CHECK-NEXT: and a0, a2, a0
491503
; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma
492504
; CHECK-NEXT: vse64.v v16, (a6), v0.t
493-
; CHECK-NEXT: bltu a0, a3, .LBB35_6
505+
; CHECK-NEXT: bltu a0, a3, .LBB36_6
494506
; CHECK-NEXT: # %bb.5:
495507
; CHECK-NEXT: mv a0, a3
496-
; CHECK-NEXT: .LBB35_6:
508+
; CHECK-NEXT: .LBB36_6:
497509
; CHECK-NEXT: slli a2, a3, 4
498510
; CHECK-NEXT: srli a3, a3, 2
499511
; CHECK-NEXT: vsetvli a4, zero, e8, mf2, ta, ma

0 commit comments

Comments
 (0)