Skip to content

Commit 1bb3488

Browse files
committed
[ARM] Predicated VFMA patterns
Similar to the Two op + select patterns that were added recently, this adds some patterns for select + fma to turn them into predicated operations. Differential Revision: https://reviews.llvm.org/D85824
1 parent 13d6cf0 commit 1bb3488

File tree

2 files changed

+44
-40
lines changed

2 files changed

+44
-40
lines changed

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3652,13 +3652,25 @@ multiclass MVE_VFMA_fp_multi<string iname, bit fms, MVEVectorVTInfo VTI> {
36523652
(Inst $add, $m1, $m2)>;
36533653
def : Pat<(VTI.Vec (fma m1, (fneg m2), add)),
36543654
(Inst $add, $m1, $m2)>;
3655+
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
3656+
(VTI.Vec (fma (fneg m1), m2, add)),
3657+
add)),
3658+
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
3659+
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
3660+
(VTI.Vec (fma m1, (fneg m2), add)),
3661+
add)),
3662+
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
36553663
def : Pat<(VTI.Vec (pred_int (fneg m1), m2, add, pred)),
36563664
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
36573665
def : Pat<(VTI.Vec (pred_int m1, (fneg m2), add, pred)),
36583666
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
36593667
} else {
36603668
def : Pat<(VTI.Vec (fma m1, m2, add)),
36613669
(Inst $add, $m1, $m2)>;
3670+
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
3671+
(VTI.Vec (fma m1, m2, add)),
3672+
add)),
3673+
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
36623674
def : Pat<(VTI.Vec (pred_int m1, m2, add, pred)),
36633675
(Inst $add, $m1, $m2, ARMVCCThen, $pred)>;
36643676
}
@@ -5538,13 +5550,25 @@ multiclass MVE_VFMA_qr_multi<string iname, MVEVectorVTInfo VTI,
55385550
if scalar_addend then {
55395551
def : Pat<(VTI.Vec (fma v1, v2, vs)),
55405552
(VTI.Vec (Inst v1, v2, is))>;
5553+
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
5554+
(VTI.Vec (fma v1, v2, vs)),
5555+
v1)),
5556+
(VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
55415557
def : Pat<(VTI.Vec (pred_int v1, v2, vs, pred)),
55425558
(VTI.Vec (Inst v1, v2, is, ARMVCCThen, pred))>;
55435559
} else {
55445560
def : Pat<(VTI.Vec (fma v1, vs, v2)),
55455561
(VTI.Vec (Inst v2, v1, is))>;
55465562
def : Pat<(VTI.Vec (fma vs, v1, v2)),
55475563
(VTI.Vec (Inst v2, v1, is))>;
5564+
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
5565+
(VTI.Vec (fma vs, v2, v1)),
5566+
v1)),
5567+
(VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
5568+
def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred),
5569+
(VTI.Vec (fma v2, vs, v1)),
5570+
v1)),
5571+
(VTI.Vec (Inst v1, v2, is, ARMVCCThen, $pred))>;
55485572
def : Pat<(VTI.Vec (pred_int v1, vs, v2, pred)),
55495573
(VTI.Vec (Inst v2, v1, is, ARMVCCThen, pred))>;
55505574
def : Pat<(VTI.Vec (pred_int vs, v1, v2, pred)),

llvm/test/CodeGen/Thumb2/mve-fmas.ll

Lines changed: 20 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -481,10 +481,8 @@ define arm_aapcs_vfpcc <8 x half> @vfma16_v1_pred(<8 x half> %src1, <8 x half> %
481481
;
482482
; CHECK-MVE-VMLA-LABEL: vfma16_v1_pred:
483483
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
484-
; CHECK-MVE-VMLA-NEXT: vmov q3, q0
485-
; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr
486-
; CHECK-MVE-VMLA-NEXT: vfma.f16 q3, q1, q2
487-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0
484+
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
485+
; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2
488486
; CHECK-MVE-VMLA-NEXT: bx lr
489487
;
490488
; CHECK-MVE-LABEL: vfma16_v1_pred:
@@ -628,10 +626,8 @@ define arm_aapcs_vfpcc <8 x half> @vfma16_v2_pred(<8 x half> %src1, <8 x half> %
628626
;
629627
; CHECK-MVE-VMLA-LABEL: vfma16_v2_pred:
630628
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
631-
; CHECK-MVE-VMLA-NEXT: vmov q3, q0
632-
; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr
633-
; CHECK-MVE-VMLA-NEXT: vfma.f16 q3, q1, q2
634-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0
629+
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
630+
; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, q2
635631
; CHECK-MVE-VMLA-NEXT: bx lr
636632
;
637633
; CHECK-MVE-LABEL: vfma16_v2_pred:
@@ -775,10 +771,8 @@ define arm_aapcs_vfpcc <8 x half> @vfms16_pred(<8 x half> %src1, <8 x half> %src
775771
;
776772
; CHECK-MVE-VMLA-LABEL: vfms16_pred:
777773
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
778-
; CHECK-MVE-VMLA-NEXT: vmov q3, q0
779-
; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr
780-
; CHECK-MVE-VMLA-NEXT: vfms.f16 q3, q1, q2
781-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0
774+
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
775+
; CHECK-MVE-VMLA-NEXT: vfmst.f16 q0, q1, q2
782776
; CHECK-MVE-VMLA-NEXT: bx lr
783777
;
784778
; CHECK-MVE-LABEL: vfms16_pred:
@@ -926,11 +920,9 @@ define arm_aapcs_vfpcc <8 x half> @vfmar16_pred(<8 x half> %src1, <8 x half> %sr
926920
; CHECK-MVE-VMLA-LABEL: vfmar16_pred:
927921
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
928922
; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
929-
; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr
930923
; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
931-
; CHECK-MVE-VMLA-NEXT: vmov q2, q0
932-
; CHECK-MVE-VMLA-NEXT: vfma.f16 q2, q1, r0
933-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q2, q0
924+
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
925+
; CHECK-MVE-VMLA-NEXT: vfmat.f16 q0, q1, r0
934926
; CHECK-MVE-VMLA-NEXT: bx lr
935927
;
936928
; CHECK-MVE-LABEL: vfmar16_pred:
@@ -1074,11 +1066,9 @@ define arm_aapcs_vfpcc <8 x half> @vfma16_pred(<8 x half> %src1, <8 x half> %src
10741066
; CHECK-MVE-VMLA-LABEL: vfma16_pred:
10751067
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
10761068
; CHECK-MVE-VMLA-NEXT: vcvtb.f16.f32 s8, s8
1077-
; CHECK-MVE-VMLA-NEXT: vcmp.f16 lt, q1, zr
10781069
; CHECK-MVE-VMLA-NEXT: vmov.f16 r0, s8
1079-
; CHECK-MVE-VMLA-NEXT: vmov q2, q0
1080-
; CHECK-MVE-VMLA-NEXT: vfmas.f16 q2, q1, r0
1081-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q2, q0
1070+
; CHECK-MVE-VMLA-NEXT: vpt.f16 lt, q1, zr
1071+
; CHECK-MVE-VMLA-NEXT: vfmast.f16 q0, q1, r0
10821072
; CHECK-MVE-VMLA-NEXT: bx lr
10831073
;
10841074
; CHECK-MVE-LABEL: vfma16_pred:
@@ -1218,10 +1208,8 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v1_pred(<4 x float> %src1, <4 x float
12181208
;
12191209
; CHECK-MVE-VMLA-LABEL: vfma32_v1_pred:
12201210
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
1221-
; CHECK-MVE-VMLA-NEXT: vmov q3, q0
1222-
; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr
1223-
; CHECK-MVE-VMLA-NEXT: vfma.f32 q3, q1, q2
1224-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0
1211+
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
1212+
; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2
12251213
; CHECK-MVE-VMLA-NEXT: bx lr
12261214
;
12271215
; CHECK-MVE-LABEL: vfma32_v1_pred:
@@ -1290,10 +1278,8 @@ define arm_aapcs_vfpcc <4 x float> @vfma32_v2_pred(<4 x float> %src1, <4 x float
12901278
;
12911279
; CHECK-MVE-VMLA-LABEL: vfma32_v2_pred:
12921280
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
1293-
; CHECK-MVE-VMLA-NEXT: vmov q3, q0
1294-
; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr
1295-
; CHECK-MVE-VMLA-NEXT: vfma.f32 q3, q1, q2
1296-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0
1281+
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
1282+
; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, q2
12971283
; CHECK-MVE-VMLA-NEXT: bx lr
12981284
;
12991285
; CHECK-MVE-LABEL: vfma32_v2_pred:
@@ -1362,10 +1348,8 @@ define arm_aapcs_vfpcc <4 x float> @vfms32_pred(<4 x float> %src1, <4 x float> %
13621348
;
13631349
; CHECK-MVE-VMLA-LABEL: vfms32_pred:
13641350
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
1365-
; CHECK-MVE-VMLA-NEXT: vmov q3, q0
1366-
; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr
1367-
; CHECK-MVE-VMLA-NEXT: vfms.f32 q3, q1, q2
1368-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q3, q0
1351+
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
1352+
; CHECK-MVE-VMLA-NEXT: vfmst.f32 q0, q1, q2
13691353
; CHECK-MVE-VMLA-NEXT: bx lr
13701354
;
13711355
; CHECK-MVE-LABEL: vfms32_pred:
@@ -1437,10 +1421,8 @@ define arm_aapcs_vfpcc <4 x float> @vfmar32_pred(<4 x float> %src1, <4 x float>
14371421
; CHECK-MVE-VMLA-LABEL: vfmar32_pred:
14381422
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
14391423
; CHECK-MVE-VMLA-NEXT: vmov r0, s8
1440-
; CHECK-MVE-VMLA-NEXT: vmov q2, q0
1441-
; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr
1442-
; CHECK-MVE-VMLA-NEXT: vfma.f32 q2, q1, r0
1443-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q2, q0
1424+
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
1425+
; CHECK-MVE-VMLA-NEXT: vfmat.f32 q0, q1, r0
14441426
; CHECK-MVE-VMLA-NEXT: bx lr
14451427
;
14461428
; CHECK-MVE-LABEL: vfmar32_pred:
@@ -1513,10 +1495,8 @@ define arm_aapcs_vfpcc <4 x float> @vfmas32_pred(<4 x float> %src1, <4 x float>
15131495
; CHECK-MVE-VMLA-LABEL: vfmas32_pred:
15141496
; CHECK-MVE-VMLA: @ %bb.0: @ %entry
15151497
; CHECK-MVE-VMLA-NEXT: vmov r0, s8
1516-
; CHECK-MVE-VMLA-NEXT: vmov q2, q0
1517-
; CHECK-MVE-VMLA-NEXT: vcmp.f32 lt, q1, zr
1518-
; CHECK-MVE-VMLA-NEXT: vfmas.f32 q2, q1, r0
1519-
; CHECK-MVE-VMLA-NEXT: vpsel q0, q2, q0
1498+
; CHECK-MVE-VMLA-NEXT: vpt.f32 lt, q1, zr
1499+
; CHECK-MVE-VMLA-NEXT: vfmast.f32 q0, q1, r0
15201500
; CHECK-MVE-VMLA-NEXT: bx lr
15211501
;
15221502
; CHECK-MVE-LABEL: vfmas32_pred:

0 commit comments

Comments
 (0)