Skip to content

Commit 74940d2

Browse files
Lorenzo Albanosimoll
authored andcommitted
[VP] Add widening for VP_STRIDED_LOAD and VP_STRIDED_STORE
Reviewed By: frasercrmck, craig.topper Differential Revision: https://reviews.llvm.org/D121114
1 parent ff1d471 commit 74940d2

File tree

6 files changed

+215
-0
lines changed

6 files changed

+215
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
947947
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
948948
SDValue WidenVecRes_LOAD(SDNode* N);
949949
SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N);
950+
SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N);
950951
SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N);
951952
SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N);
952953
SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N);
@@ -984,6 +985,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
984985
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
985986
SDValue WidenVecOp_STORE(SDNode* N);
986987
SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo);
988+
SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo);
987989
SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo);
988990
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
989991
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3687,6 +3687,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
36873687
case ISD::VP_LOAD:
36883688
Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
36893689
break;
3690+
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
3691+
Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
3692+
break;
36903693
case ISD::MLOAD:
36913694
Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
36923695
break;
@@ -4919,6 +4922,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
49194922
return Res;
49204923
}
49214924

4925+
SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
4926+
SDLoc DL(N);
4927+
4928+
// The mask should be widened as well
4929+
SDValue Mask = N->getMask();
4930+
assert(getTypeAction(Mask.getValueType()) ==
4931+
TargetLowering::TypeWidenVector &&
4932+
"Unable to widen VP strided load");
4933+
Mask = GetWidenedVector(Mask);
4934+
4935+
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
4936+
assert(Mask.getValueType().getVectorElementCount() ==
4937+
WidenVT.getVectorElementCount() &&
4938+
"Data and mask vectors should have the same number of elements");
4939+
4940+
SDValue Res = DAG.getStridedLoadVP(
4941+
N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(),
4942+
N->getBasePtr(), N->getOffset(), N->getStride(), Mask,
4943+
N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(),
4944+
N->isExpandingLoad());
4945+
4946+
// Legalize the chain result - switch anything that used the old chain to
4947+
// use the new one.
4948+
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
4949+
return Res;
4950+
}
4951+
49224952
SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
49234953

49244954
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0));
@@ -5436,6 +5466,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
54365466
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
54375467
case ISD::STORE: Res = WidenVecOp_STORE(N); break;
54385468
case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
5469+
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
5470+
Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
5471+
break;
54395472
case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
54405473
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
54415474
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
@@ -5914,6 +5947,38 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
59145947
ST->isCompressingStore());
59155948
}
59165949

5950+
SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
5951+
unsigned OpNo) {
5952+
assert((OpNo == 1 || OpNo == 4) &&
5953+
"Can widen only data or mask operand of vp_strided_store");
5954+
VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
5955+
SDValue Mask = SST->getMask();
5956+
SDValue StVal = SST->getValue();
5957+
SDLoc DL(N);
5958+
5959+
if (OpNo == 1)
5960+
assert(getTypeAction(Mask.getValueType()) ==
5961+
TargetLowering::TypeWidenVector &&
5962+
"Unable to widen VP strided store");
5963+
else
5964+
assert(getTypeAction(StVal.getValueType()) ==
5965+
TargetLowering::TypeWidenVector &&
5966+
"Unable to widen VP strided store");
5967+
5968+
StVal = GetWidenedVector(StVal);
5969+
Mask = GetWidenedVector(Mask);
5970+
5971+
assert(StVal.getValueType().getVectorElementCount() ==
5972+
Mask.getValueType().getVectorElementCount() &&
5973+
"Data and mask vectors should have the same number of elements");
5974+
5975+
return DAG.getStridedStoreVP(
5976+
SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(),
5977+
SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(),
5978+
SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(),
5979+
SST->isCompressingStore());
5980+
}
5981+
59175982
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
59185983
assert((OpNo == 1 || OpNo == 3) &&
59195984
"Can widen only data or mask operand of mstore");

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -563,3 +563,40 @@ define <8 x double> @strided_vpload_v8f64(double* %ptr, i32 signext %stride, <8
563563
%load = call <8 x double> @llvm.experimental.vp.strided.load.v8f64.p0f64.i32(double* %ptr, i32 %stride, <8 x i1> %m, i32 %evl)
564564
ret <8 x double> %load
565565
}
566+
567+
; Widening
568+
define <3 x double> @strided_vpload_v3f64(double* %ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) {
569+
; CHECK-RV32-LABEL: strided_vpload_v3f64:
570+
; CHECK-RV32: # %bb.0:
571+
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
572+
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
573+
; CHECK-RV32-NEXT: ret
574+
;
575+
; CHECK-RV64-LABEL: strided_vpload_v3f64:
576+
; CHECK-RV64: # %bb.0:
577+
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m2, ta, mu
578+
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
579+
; CHECK-RV64-NEXT: ret
580+
%v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double* %ptr, i32 %stride, <3 x i1> %mask, i32 %evl)
581+
ret <3 x double> %v
582+
}
583+
584+
define <3 x double> @strided_vpload_v3f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) {
585+
; CHECK-RV32-LABEL: strided_vpload_v3f64_allones_mask:
586+
; CHECK-RV32: # %bb.0:
587+
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu
588+
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1
589+
; CHECK-RV32-NEXT: ret
590+
;
591+
; CHECK-RV64-LABEL: strided_vpload_v3f64_allones_mask:
592+
; CHECK-RV64: # %bb.0:
593+
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m2, ta, mu
594+
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1
595+
; CHECK-RV64-NEXT: ret
596+
%one = insertelement <3 x i1> poison, i1 true, i32 0
597+
%allones = shufflevector <3 x i1> %one, <3 x i1> poison, <3 x i32> zeroinitializer
598+
%v = call <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double* %ptr, i32 %stride, <3 x i1> %allones, i32 %evl)
599+
ret <3 x double> %v
600+
}
601+
602+
declare <3 x double> @llvm.experimental.vp.strided.load.v3f64.p0f64.i32(double*, i32, <3 x i1>, i32)

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -455,3 +455,40 @@ define void @strided_vpstore_v2i8_allones_mask(<2 x i8> %val, i8* %ptr, i32 sign
455455
call void @llvm.experimental.vp.strided.store.v2i8.p0i8.i32(<2 x i8> %val, i8* %ptr, i32 %stride, <2 x i1> %b, i32 %evl)
456456
ret void
457457
}
458+
459+
; Widening
460+
define void @strided_vpstore_v3f32(<3 x float> %v, float *%ptr, i32 signext %stride, <3 x i1> %mask, i32 zeroext %evl) {
461+
; CHECK-RV32-LABEL: strided_vpstore_v3f32:
462+
; CHECK-RV32: # %bb.0:
463+
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m1, ta, mu
464+
; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1, v0.t
465+
; CHECK-RV32-NEXT: ret
466+
;
467+
; CHECK-RV64-LABEL: strided_vpstore_v3f32:
468+
; CHECK-RV64: # %bb.0:
469+
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m1, ta, mu
470+
; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1, v0.t
471+
; CHECK-RV64-NEXT: ret
472+
call void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float> %v, float* %ptr, i32 %stride, <3 x i1> %mask, i32 %evl)
473+
ret void
474+
}
475+
476+
define void @strided_vpstore_v3f32_allones_mask(<3 x float> %v, float *%ptr, i32 signext %stride, i32 zeroext %evl) {
477+
; CHECK-RV32-LABEL: strided_vpstore_v3f32_allones_mask:
478+
; CHECK-RV32: # %bb.0:
479+
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m1, ta, mu
480+
; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1
481+
; CHECK-RV32-NEXT: ret
482+
;
483+
; CHECK-RV64-LABEL: strided_vpstore_v3f32_allones_mask:
484+
; CHECK-RV64: # %bb.0:
485+
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m1, ta, mu
486+
; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1
487+
; CHECK-RV64-NEXT: ret
488+
%one = insertelement <3 x i1> poison, i1 true, i32 0
489+
%allones = shufflevector <3 x i1> %one, <3 x i1> poison, <3 x i32> zeroinitializer
490+
call void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float> %v, float* %ptr, i32 %stride, <3 x i1> %allones, i32 %evl)
491+
ret void
492+
}
493+
494+
declare void @llvm.experimental.vp.strided.store.v3f32.p0f32.i32(<3 x float>, float* , i32, <3 x i1>, i32)

llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,3 +723,40 @@ define <vscale x 8 x double> @strided_vpload_nxv8f64(double* %ptr, i32 signext %
723723
%load = call <vscale x 8 x double> @llvm.experimental.vp.strided.load.nxv8f64.p0f64.i32(double* %ptr, i32 signext %stride, <vscale x 8 x i1> %m, i32 %evl)
724724
ret <vscale x 8 x double> %load
725725
}
726+
727+
; Widening
728+
define <vscale x 3 x double> @strided_vpload_nxv3f64(double* %ptr, i32 signext %stride, <vscale x 3 x i1> %mask, i32 zeroext %evl) {
729+
; CHECK-RV32-LABEL: strided_vpload_nxv3f64:
730+
; CHECK-RV32: # %bb.0:
731+
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
732+
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t
733+
; CHECK-RV32-NEXT: ret
734+
;
735+
; CHECK-RV64-LABEL: strided_vpload_nxv3f64:
736+
; CHECK-RV64: # %bb.0:
737+
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m4, ta, mu
738+
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t
739+
; CHECK-RV64-NEXT: ret
740+
%v = call <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double* %ptr, i32 %stride, <vscale x 3 x i1> %mask, i32 %evl)
741+
ret <vscale x 3 x double> %v
742+
}
743+
744+
define <vscale x 3 x double> @strided_vpload_nxv3f64_allones_mask(double* %ptr, i32 signext %stride, i32 zeroext %evl) {
745+
; CHECK-RV32-LABEL: strided_vpload_nxv3f64_allones_mask:
746+
; CHECK-RV32: # %bb.0:
747+
; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu
748+
; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1
749+
; CHECK-RV32-NEXT: ret
750+
;
751+
; CHECK-RV64-LABEL: strided_vpload_nxv3f64_allones_mask:
752+
; CHECK-RV64: # %bb.0:
753+
; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m4, ta, mu
754+
; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1
755+
; CHECK-RV64-NEXT: ret
756+
%one = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
757+
%allones = shufflevector <vscale x 3 x i1> %one, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
758+
%v = call <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double* %ptr, i32 %stride, <vscale x 3 x i1> %allones, i32 %evl)
759+
ret <vscale x 3 x double> %v
760+
}
761+
762+
declare <vscale x 3 x double> @llvm.experimental.vp.strided.load.nxv3f64.p0f64.i32(double*, i32, <vscale x 3 x i1>, i32)

llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,3 +579,40 @@ define void @strided_vpstore_nxv1i8_allones_mask(<vscale x 1 x i8> %val, i8* %pt
579579
call void @llvm.experimental.vp.strided.store.nxv1i8.p0i8.i32(<vscale x 1 x i8> %val, i8* %ptr, i32 %strided, <vscale x 1 x i1> %b, i32 %evl)
580580
ret void
581581
}
582+
583+
; Widening
584+
define void @strided_vpstore_nxv3f32(<vscale x 3 x float> %v, float *%ptr, i32 signext %stride, <vscale x 3 x i1> %mask, i32 zeroext %evl) {
585+
; CHECK-RV32-LABEL: strided_vpstore_nxv3f32:
586+
; CHECK-RV32: # %bb.0:
587+
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, mu
588+
; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1, v0.t
589+
; CHECK-RV32-NEXT: ret
590+
;
591+
; CHECK-RV64-LABEL: strided_vpstore_nxv3f32:
592+
; CHECK-RV64: # %bb.0:
593+
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m2, ta, mu
594+
; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1, v0.t
595+
; CHECK-RV64-NEXT: ret
596+
call void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32(<vscale x 3 x float> %v, float* %ptr, i32 %stride, <vscale x 3 x i1> %mask, i32 %evl)
597+
ret void
598+
}
599+
600+
define void @strided_vpstore_nxv3f32_allones_mask(<vscale x 3 x float> %v, float *%ptr, i32 signext %stride, i32 zeroext %evl) {
601+
; CHECK-RV32-LABEL: strided_vpstore_nxv3f32_allones_mask:
602+
; CHECK-RV32: # %bb.0:
603+
; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m2, ta, mu
604+
; CHECK-RV32-NEXT: vsse32.v v8, (a0), a1
605+
; CHECK-RV32-NEXT: ret
606+
;
607+
; CHECK-RV64-LABEL: strided_vpstore_nxv3f32_allones_mask:
608+
; CHECK-RV64: # %bb.0:
609+
; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m2, ta, mu
610+
; CHECK-RV64-NEXT: vsse32.v v8, (a0), a1
611+
; CHECK-RV64-NEXT: ret
612+
%one = insertelement <vscale x 3 x i1> poison, i1 true, i32 0
613+
%allones = shufflevector <vscale x 3 x i1> %one, <vscale x 3 x i1> poison, <vscale x 3 x i32> zeroinitializer
614+
call void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32(<vscale x 3 x float> %v, float* %ptr, i32 %stride, <vscale x 3 x i1> %allones, i32 %evl)
615+
ret void
616+
}
617+
618+
declare void @llvm.experimental.vp.strided.store.nxv3f32.p0f32.i32(<vscale x 3 x float>, float* , i32, <vscale x 3 x i1>, i32)

0 commit comments

Comments
 (0)