Skip to content

Commit cf4df61

Browse files
[SVE] Add intrinsics for floating-point operations that explicitly undefine the result for inactive lanes.
This patch is the floating-point equivalent of D141937. Depends on D143764. Differential Revision: https://reviews.llvm.org/D143765
1 parent 83e9ef7 commit cf4df61

File tree

4 files changed

+877
-8
lines changed

4 files changed

+877
-8
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1931,31 +1931,44 @@ def int_aarch64_sve_whilehi : AdvSIMD_SVE_WHILE_Intrinsic;
19311931
//
19321932

19331933
def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic;
1934+
def int_aarch64_sve_fabd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19341935
def int_aarch64_sve_fabs : AdvSIMD_Merged1VectorArg_Intrinsic;
19351936
def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic;
1937+
def int_aarch64_sve_fadd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19361938
def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic;
19371939
def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic;
19381940
def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
19391941
def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
1942+
def int_aarch64_sve_fdiv_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19401943
def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
19411944
def int_aarch64_sve_fexpa_x : AdvSIMD_SVE_EXPA_Intrinsic;
19421945
def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic;
19431946
def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic;
1947+
def int_aarch64_sve_fmax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19441948
def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic;
1949+
def int_aarch64_sve_fmaxnm_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19451950
def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic;
1951+
def int_aarch64_sve_fmin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19461952
def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic;
1953+
def int_aarch64_sve_fminnm_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19471954
def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic;
19481955
def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
1956+
def int_aarch64_sve_fmla_u : AdvSIMD_Pred3VectorArg_Intrinsic;
19491957
def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic;
19501958
def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
1959+
def int_aarch64_sve_fmls_u : AdvSIMD_Pred3VectorArg_Intrinsic;
19511960
def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
19521961
def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic;
1962+
def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
1963+
def int_aarch64_sve_fmul_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19531964
def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic;
1965+
def int_aarch64_sve_fmulx_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19541966
def int_aarch64_sve_fneg : AdvSIMD_Merged1VectorArg_Intrinsic;
1955-
def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
19561967
def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic;
19571968
def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic;
1969+
def int_aarch64_sve_fnmla_u : AdvSIMD_Pred3VectorArg_Intrinsic;
19581970
def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic;
1971+
def int_aarch64_sve_fnmls_u : AdvSIMD_Pred3VectorArg_Intrinsic;
19591972
def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
19601973
def int_aarch64_sve_frecpe_x : AdvSIMD_1VectorArg_Intrinsic;
19611974
def int_aarch64_sve_frecps_x : AdvSIMD_2VectorArg_Intrinsic;
@@ -1972,6 +1985,7 @@ def int_aarch64_sve_frsqrts_x : AdvSIMD_2VectorArg_Intrinsic;
19721985
def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic;
19731986
def int_aarch64_sve_fsqrt : AdvSIMD_Merged1VectorArg_Intrinsic;
19741987
def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic;
1988+
def int_aarch64_sve_fsub_u : AdvSIMD_Pred2VectorArg_Intrinsic;
19751989
def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
19761990
def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;
19771991
def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18337,10 +18337,38 @@ static SDValue performIntrinsicCombine(SDNode *N,
1833718337
N->getOperand(1), N->getOperand(2), N->getOperand(3));
1833818338
case Intrinsic::aarch64_sve_fadd:
1833918339
return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
18340-
case Intrinsic::aarch64_sve_fsub:
18341-
return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
18340+
case Intrinsic::aarch64_sve_fadd_u:
18341+
return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0),
18342+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
18343+
case Intrinsic::aarch64_sve_fdiv_u:
18344+
return DAG.getNode(AArch64ISD::FDIV_PRED, SDLoc(N), N->getValueType(0),
18345+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
18346+
case Intrinsic::aarch64_sve_fmax_u:
18347+
return DAG.getNode(AArch64ISD::FMAX_PRED, SDLoc(N), N->getValueType(0),
18348+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
18349+
case Intrinsic::aarch64_sve_fmaxnm_u:
18350+
return DAG.getNode(AArch64ISD::FMAXNM_PRED, SDLoc(N), N->getValueType(0),
18351+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
18352+
case Intrinsic::aarch64_sve_fmla_u:
18353+
return DAG.getNode(AArch64ISD::FMA_PRED, SDLoc(N), N->getValueType(0),
18354+
N->getOperand(1), N->getOperand(3), N->getOperand(4),
18355+
N->getOperand(2));
18356+
case Intrinsic::aarch64_sve_fmin_u:
18357+
return DAG.getNode(AArch64ISD::FMIN_PRED, SDLoc(N), N->getValueType(0),
18358+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
18359+
case Intrinsic::aarch64_sve_fminnm_u:
18360+
return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0),
18361+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
1834218362
case Intrinsic::aarch64_sve_fmul:
1834318363
return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
18364+
case Intrinsic::aarch64_sve_fmul_u:
18365+
return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0),
18366+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
18367+
case Intrinsic::aarch64_sve_fsub:
18368+
return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
18369+
case Intrinsic::aarch64_sve_fsub_u:
18370+
return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0),
18371+
N->getOperand(1), N->getOperand(2), N->getOperand(3));
1834418372
case Intrinsic::aarch64_sve_add:
1834518373
return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
1834618374
case Intrinsic::aarch64_sve_add_u:

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -352,24 +352,28 @@ def AArch64fmul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
352352
}]>;
353353

354354

355-
def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
356-
(AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>;
355+
def AArch64fabd_p : PatFrags<(ops node:$pg, node:$op1, node:$op2),
356+
[(int_aarch64_sve_fabd_u node:$pg, node:$op1, node:$op2),
357+
(AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)]>;
357358

358359
def AArch64fmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
359360
[(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za),
360361
(vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), node:$zn, node:$zm, node:$za), node:$za)]>;
361362

362363
def AArch64fmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
363-
[(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za),
364+
[(int_aarch64_sve_fmls_u node:$pg, node:$za, node:$zn, node:$zm),
365+
(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za),
364366
(AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za),
365367
(vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), (AArch64fneg_mt (AArch64ptrue 31), node:$zn, (undef)), node:$zm, node:$za), node:$za)]>;
366368

367369
def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
368-
[(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))),
370+
[(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm),
371+
(AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))),
369372
(AArch64fneg_mt_nsz node:$pg, (AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za), (undef))]>;
370373

371374
def AArch64fnmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
372-
[(AArch64fma_p node:$pg, node:$zn, node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef)))]>;
375+
[(int_aarch64_sve_fnmls_u node:$pg, node:$za, node:$zn, node:$zm),
376+
(AArch64fma_p node:$pg, node:$zn, node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef)))]>;
373377

374378
def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
375379
(AArch64fsub_p node:$pg, node:$op2, node:$op1)>;
@@ -623,6 +627,7 @@ let Predicates = [HasSVEorSME] in {
623627
defm FMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmax_p>;
624628
defm FMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmin_p>;
625629
defm FABD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fabd_p>;
630+
defm FMULX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_fmulx_u>;
626631
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
627632
} // End HasSVEorSME
628633

0 commit comments

Comments
 (0)