Skip to content

Commit 5cc6e53

Browse files
fixup! [InstCombine][RISCV] Convert VPIntrinsics with splat operands to splats of the scalar operation
Respond to craigs comments
1 parent 2b96e3f commit 5cc6e53

File tree

2 files changed

+39
-25
lines changed

2 files changed

+39
-25
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -781,18 +781,16 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
781781
// Determine scalar opcode
782782
std::optional<unsigned> FunctionalOpcode =
783783
VPI.getFunctionalOpcode();
784-
bool ScalarIsIntr = false;
785-
std::optional<Intrinsic::ID> ScalarIntrID;
784+
std::optional<Intrinsic::ID> ScalarIntrID = std::nullopt;
786785
if (!FunctionalOpcode) {
787786
ScalarIntrID = VPI.getFunctionalIntrinsicID();
788787
if (!ScalarIntrID)
789788
return false;
790-
ScalarIsIntr = true;
791789
}
792790

793791
// Calculate cost of scalarizing
794792
InstructionCost ScalarOpCost = 0;
795-
if (ScalarIsIntr) {
793+
if (ScalarIntrID) {
796794
IntrinsicCostAttributes Attrs(*ScalarIntrID, VecTy->getScalarType(), Args);
797795
ScalarOpCost = TTI.getIntrinsicInstrCost(Attrs, CostKind);
798796
} else {
@@ -818,17 +816,15 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) {
818816
ElementCount EC = cast<VectorType>(Op0->getType())->getElementCount();
819817
Value *EVL = VPI.getArgOperand(3);
820818
const DataLayout &DL = VPI.getModule()->getDataLayout();
821-
bool IsKnownNonZeroVL = isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT);
822819
bool MustHaveNonZeroVL =
823820
IntrID == Intrinsic::vp_sdiv || IntrID == Intrinsic::vp_udiv ||
824-
IntrID == Intrinsic::vp_srem || IntrID == Intrinsic::vp_urem ||
825-
IntrID == Intrinsic::vp_fdiv || IntrID == Intrinsic::vp_frem;
821+
IntrID == Intrinsic::vp_srem || IntrID == Intrinsic::vp_urem;
826822

827-
if ((MustHaveNonZeroVL && IsKnownNonZeroVL) || !MustHaveNonZeroVL) {
823+
if (!MustHaveNonZeroVL || isKnownNonZero(EVL, DL, 0, &AC, &VPI, &DT)) {
828824
Value *ScalarOp0 = getSplatValue(Op0);
829825
Value *ScalarOp1 = getSplatValue(Op1);
830826
Value *ScalarVal =
831-
ScalarIsIntr
827+
ScalarIntrID
832828
? Builder.CreateIntrinsic(VecTy->getScalarType(), *ScalarIntrID,
833829
{ScalarOp0, ScalarOp1})
834830
: Builder.CreateBinOp((Instruction::BinaryOps)(*FunctionalOpcode),

llvm/test/Transforms/VectorCombine/RISCV/vpintrin-scalarization.ll

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -975,14 +975,23 @@ define <vscale x 1 x float> @fsub_nxv1f32_anymask(<vscale x 1 x float> %x, float
975975
}
976976

977977
define <vscale x 1 x float> @fdiv_nxv1f32_allonesmask(<vscale x 1 x float> %x, float %y, i32 zeroext %evl) {
978-
; ALL-LABEL: @fdiv_nxv1f32_allonesmask(
979-
; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
980-
; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
981-
; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0
982-
; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
983-
; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
984-
; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
985-
; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]]
978+
; VEC-COMBINE-LABEL: @fdiv_nxv1f32_allonesmask(
979+
; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
980+
; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
981+
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = fdiv float [[Y:%.*]], 4.200000e+01
982+
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x float> poison, float [[TMP1]], i64 0
983+
; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[DOTSPLATINSERT]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
984+
; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
985+
; VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP3]]
986+
;
987+
; NO-VEC-COMBINE-LABEL: @fdiv_nxv1f32_allonesmask(
988+
; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
989+
; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
990+
; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0
991+
; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
992+
; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
993+
; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
994+
; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]]
986995
;
987996
%splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
988997
%mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
@@ -1009,14 +1018,23 @@ define <vscale x 1 x float> @fdiv_nxv1f32_anymask(<vscale x 1 x float> %x, float
10091018
}
10101019

10111020
define <vscale x 1 x float> @frem_nxv1f32_allonesmask(<vscale x 1 x float> %x, float %y, i32 zeroext %evl) {
1012-
; ALL-LABEL: @frem_nxv1f32_allonesmask(
1013-
; ALL-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
1014-
; ALL-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1015-
; ALL-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0
1016-
; ALL-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
1017-
; ALL-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
1018-
; ALL-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
1019-
; ALL-NEXT: ret <vscale x 1 x float> [[TMP4]]
1021+
; VEC-COMBINE-LABEL: @frem_nxv1f32_allonesmask(
1022+
; VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
1023+
; VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1024+
; VEC-COMBINE-NEXT: [[TMP1:%.*]] = frem float [[Y:%.*]], 4.200000e+01
1025+
; VEC-COMBINE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 1 x float> poison, float [[TMP1]], i64 0
1026+
; VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[DOTSPLATINSERT]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
1027+
; VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP2]], <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
1028+
; VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP3]]
1029+
;
1030+
; NO-VEC-COMBINE-LABEL: @frem_nxv1f32_allonesmask(
1031+
; NO-VEC-COMBINE-NEXT: [[SPLAT:%.*]] = insertelement <vscale x 1 x i1> poison, i1 true, i32 0
1032+
; NO-VEC-COMBINE-NEXT: [[MASK:%.*]] = shufflevector <vscale x 1 x i1> [[SPLAT]], <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
1033+
; NO-VEC-COMBINE-NEXT: [[TMP1:%.*]] = insertelement <vscale x 1 x float> poison, float [[Y:%.*]], i64 0
1034+
; NO-VEC-COMBINE-NEXT: [[TMP2:%.*]] = shufflevector <vscale x 1 x float> [[TMP1]], <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
1035+
; NO-VEC-COMBINE-NEXT: [[TMP3:%.*]] = call <vscale x 1 x float> @llvm.vp.frem.nxv1f32(<vscale x 1 x float> [[TMP2]], <vscale x 1 x float> shufflevector (<vscale x 1 x float> insertelement (<vscale x 1 x float> poison, float 4.200000e+01, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> [[MASK]], i32 [[EVL:%.*]])
1036+
; NO-VEC-COMBINE-NEXT: [[TMP4:%.*]] = call <vscale x 1 x float> @llvm.vp.fadd.nxv1f32(<vscale x 1 x float> [[X:%.*]], <vscale x 1 x float> [[TMP3]], <vscale x 1 x i1> [[MASK]], i32 [[EVL]])
1037+
; NO-VEC-COMBINE-NEXT: ret <vscale x 1 x float> [[TMP4]]
10201038
;
10211039
%splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
10221040
%mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer

0 commit comments

Comments
 (0)