Skip to content

Commit 1e1781b

Browse files
[LLVM][SVE] Improve code generation for i1 based int_to_fp operations. (#129229)
Rather than extending the predicate we can use it directly to select between the two possible results.
1 parent ec54ec6 commit 1e1781b

File tree

4 files changed

+59
-65
lines changed

4 files changed

+59
-65
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5036,11 +5036,10 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
50365036

50375037
if (VT.isScalableVector()) {
50385038
if (InVT.getVectorElementType() == MVT::i1) {
5039-
// We can't directly extend an SVE predicate; extend it first.
5040-
unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5041-
EVT CastVT = getPromotedVTForPredicate(InVT);
5042-
In = DAG.getNode(CastOpc, dl, CastVT, In);
5043-
return DAG.getNode(Opc, dl, VT, In);
5039+
SDValue FalseVal = DAG.getConstantFP(0.0, dl, VT);
5040+
SDValue TrueVal = IsSigned ? DAG.getConstantFP(-1.0, dl, VT)
5041+
: DAG.getConstantFP(1.0, dl, VT);
5042+
return DAG.getNode(ISD::VSELECT, dl, VT, In, TrueVal, FalseVal);
50445043
}
50455044

50465045
unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5452,6 +5452,19 @@ multiclass sve_int_dup_fpimm_pred<string asm> {
54525452
(!cast<Instruction>(NAME # _S) ZPR32:$Zd, PPRAny:$Pg, fpimm32:$imm8), 1>;
54535453
def : InstAlias<"fmov $Zd, $Pg/m, $imm8",
54545454
(!cast<Instruction>(NAME # _D) ZPR64:$Zd, PPRAny:$Pg, fpimm64:$imm8), 1>;
5455+
5456+
def : Pat<(nxv8f16 (vselect nxv8i1:$pg, (splat_vector fpimm16:$imm8), nxv8f16:$zd)),
5457+
(!cast<Instruction>(NAME # _H) $zd, $pg, fpimm16:$imm8)>;
5458+
def : Pat<(nxv4f16 (vselect nxv4i1:$pg, (splat_vector fpimm16:$imm8), nxv4f16:$zd)),
5459+
(!cast<Instruction>(NAME # _H) $zd, $pg, fpimm16:$imm8)>;
5460+
def : Pat<(nxv2f16 (vselect nxv2i1:$pg, (splat_vector fpimm16:$imm8), nxv2f16:$zd)),
5461+
(!cast<Instruction>(NAME # _H) $zd, $pg, fpimm16:$imm8)>;
5462+
def : Pat<(nxv4f32 (vselect nxv4i1:$pg, (splat_vector fpimm32:$imm8), nxv4f32:$zd)),
5463+
(!cast<Instruction>(NAME # _S) $zd, $pg, fpimm32:$imm8)>;
5464+
def : Pat<(nxv2f32 (vselect nxv2i1:$pg, (splat_vector fpimm32:$imm8), nxv2f32:$zd)),
5465+
(!cast<Instruction>(NAME # _S) $zd, $pg, fpimm32:$imm8)>;
5466+
def : Pat<(nxv2f64 (vselect nxv2i1:$pg, (splat_vector fpimm64:$imm8), nxv2f64:$zd)),
5467+
(!cast<Instruction>(NAME # _D) $zd, $pg, fpimm64:$imm8)>;
54555468
}
54565469

54575470
class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,

llvm/test/CodeGen/AArch64/sve-fcvt.ll

Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -454,9 +454,8 @@ define <vscale x 2 x i64> @fcvtzu_d_nxv2f64(<vscale x 2 x double> %a) {
454454
define <vscale x 2 x half> @scvtf_h_nxv2i1(<vscale x 2 x i1> %a) {
455455
; CHECK-LABEL: scvtf_h_nxv2i1:
456456
; CHECK: // %bb.0:
457-
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
458-
; CHECK-NEXT: ptrue p0.d
459-
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
457+
; CHECK-NEXT: mov z0.h, #0 // =0x0
458+
; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
460459
; CHECK-NEXT: ret
461460
%res = sitofp <vscale x 2 x i1> %a to <vscale x 2 x half>
462461
ret <vscale x 2 x half> %res
@@ -495,9 +494,8 @@ define <vscale x 2 x half> @scvtf_h_nxv2i64(<vscale x 2 x i64> %a) {
495494
define <vscale x 3 x half> @scvtf_h_nxv3i1(<vscale x 3 x i1> %a) {
496495
; CHECK-LABEL: scvtf_h_nxv3i1:
497496
; CHECK: // %bb.0:
498-
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
499-
; CHECK-NEXT: ptrue p0.s
500-
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
497+
; CHECK-NEXT: mov z0.h, #0 // =0x0
498+
; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
501499
; CHECK-NEXT: ret
502500
%res = sitofp <vscale x 3 x i1> %a to <vscale x 3 x half>
503501
ret <vscale x 3 x half> %res
@@ -516,9 +514,8 @@ define <vscale x 3 x half> @scvtf_h_nxv3i16(<vscale x 3 x i16> %a) {
516514
define <vscale x 4 x half> @scvtf_h_nxv4i1(<vscale x 4 x i1> %a) {
517515
; CHECK-LABEL: scvtf_h_nxv4i1:
518516
; CHECK: // %bb.0:
519-
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
520-
; CHECK-NEXT: ptrue p0.s
521-
; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
517+
; CHECK-NEXT: mov z0.h, #0 // =0x0
518+
; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
522519
; CHECK-NEXT: ret
523520
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x half>
524521
ret <vscale x 4 x half> %res
@@ -547,9 +544,8 @@ define <vscale x 4 x half> @scvtf_h_nxv4i32(<vscale x 4 x i32> %a) {
547544
define <vscale x 7 x half> @scvtf_h_nxv7i1(<vscale x 7 x i1> %a) {
548545
; CHECK-LABEL: scvtf_h_nxv7i1:
549546
; CHECK: // %bb.0:
550-
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
551-
; CHECK-NEXT: ptrue p0.h
552-
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
547+
; CHECK-NEXT: mov z0.h, #0 // =0x0
548+
; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
553549
; CHECK-NEXT: ret
554550
%res = sitofp <vscale x 7 x i1> %a to <vscale x 7 x half>
555551
ret <vscale x 7 x half> %res
@@ -568,9 +564,8 @@ define <vscale x 7 x half> @scvtf_h_nxv7i16(<vscale x 7 x i16> %a) {
568564
define <vscale x 8 x half> @scvtf_h_nxv8i1(<vscale x 8 x i1> %a) {
569565
; CHECK-LABEL: scvtf_h_nxv8i1:
570566
; CHECK: // %bb.0:
571-
; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
572-
; CHECK-NEXT: ptrue p0.h
573-
; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
567+
; CHECK-NEXT: mov z0.h, #0 // =0x0
568+
; CHECK-NEXT: fmov z0.h, p0/m, #-1.00000000
574569
; CHECK-NEXT: ret
575570
%res = sitofp <vscale x 8 x i1> %a to <vscale x 8 x half>
576571
ret <vscale x 8 x half> %res
@@ -589,9 +584,8 @@ define <vscale x 8 x half> @scvtf_h_nxv8i16(<vscale x 8 x i16> %a) {
589584
define <vscale x 2 x float> @scvtf_s_nxv2i1(<vscale x 2 x i1> %a) {
590585
; CHECK-LABEL: scvtf_s_nxv2i1:
591586
; CHECK: // %bb.0:
592-
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
593-
; CHECK-NEXT: ptrue p0.d
594-
; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
587+
; CHECK-NEXT: mov z0.s, #0 // =0x0
588+
; CHECK-NEXT: fmov z0.s, p0/m, #-1.00000000
595589
; CHECK-NEXT: ret
596590
%res = sitofp <vscale x 2 x i1> %a to <vscale x 2 x float>
597591
ret <vscale x 2 x float> %res
@@ -620,9 +614,8 @@ define <vscale x 2 x float> @scvtf_s_nxv2i64(<vscale x 2 x i64> %a) {
620614
define <vscale x 3 x float> @scvtf_s_nxv3i1(<vscale x 3 x i1> %a) {
621615
; CHECK-LABEL: scvtf_s_nxv3i1:
622616
; CHECK: // %bb.0:
623-
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
624-
; CHECK-NEXT: ptrue p0.s
625-
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
617+
; CHECK-NEXT: mov z0.s, #0 // =0x0
618+
; CHECK-NEXT: fmov z0.s, p0/m, #-1.00000000
626619
; CHECK-NEXT: ret
627620
%res = sitofp <vscale x 3 x i1> %a to <vscale x 3 x float>
628621
ret <vscale x 3 x float> %res
@@ -641,9 +634,8 @@ define <vscale x 3 x float> @scvtf_s_nxv3i32(<vscale x 3 x i32> %a) {
641634
define <vscale x 4 x float> @scvtf_s_nxv4i1(<vscale x 4 x i1> %a) {
642635
; CHECK-LABEL: scvtf_s_nxv4i1:
643636
; CHECK: // %bb.0:
644-
; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
645-
; CHECK-NEXT: ptrue p0.s
646-
; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
637+
; CHECK-NEXT: mov z0.s, #0 // =0x0
638+
; CHECK-NEXT: fmov z0.s, p0/m, #-1.00000000
647639
; CHECK-NEXT: ret
648640
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x float>
649641
ret <vscale x 4 x float> %res
@@ -662,9 +654,8 @@ define <vscale x 4 x float> @scvtf_s_nxv4i32(<vscale x 4 x i32> %a) {
662654
define <vscale x 2 x double> @scvtf_d_nxv2i1(<vscale x 2 x i1> %a) {
663655
; CHECK-LABEL: scvtf_d_nxv2i1:
664656
; CHECK: // %bb.0:
665-
; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
666-
; CHECK-NEXT: ptrue p0.d
667-
; CHECK-NEXT: scvtf z0.d, p0/m, z0.d
657+
; CHECK-NEXT: mov z0.d, #0 // =0x0
658+
; CHECK-NEXT: fmov z0.d, p0/m, #-1.00000000
668659
; CHECK-NEXT: ret
669660
%res = sitofp <vscale x 2 x i1> %a to <vscale x 2 x double>
670661
ret <vscale x 2 x double> %res
@@ -695,9 +686,8 @@ define <vscale x 2 x double> @scvtf_d_nxv2i64(<vscale x 2 x i64> %a) {
695686
define <vscale x 2 x half> @ucvtf_h_nxv2i1(<vscale x 2 x i1> %a) {
696687
; CHECK-LABEL: ucvtf_h_nxv2i1:
697688
; CHECK: // %bb.0:
698-
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
699-
; CHECK-NEXT: ptrue p0.d
700-
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
689+
; CHECK-NEXT: mov z0.h, #0 // =0x0
690+
; CHECK-NEXT: fmov z0.h, p0/m, #1.00000000
701691
; CHECK-NEXT: ret
702692
%res = uitofp <vscale x 2 x i1> %a to <vscale x 2 x half>
703693
ret <vscale x 2 x half> %res
@@ -736,9 +726,8 @@ define <vscale x 2 x half> @ucvtf_h_nxv2i64(<vscale x 2 x i64> %a) {
736726
define <vscale x 3 x half> @ucvtf_h_nxv3i1(<vscale x 3 x i1> %a) {
737727
; CHECK-LABEL: ucvtf_h_nxv3i1:
738728
; CHECK: // %bb.0:
739-
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
740-
; CHECK-NEXT: ptrue p0.s
741-
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
729+
; CHECK-NEXT: mov z0.h, #0 // =0x0
730+
; CHECK-NEXT: fmov z0.h, p0/m, #1.00000000
742731
; CHECK-NEXT: ret
743732
%res = uitofp <vscale x 3 x i1> %a to <vscale x 3 x half>
744733
ret <vscale x 3 x half> %res
@@ -767,9 +756,8 @@ define <vscale x 3 x half> @ucvtf_h_nxv3i32(<vscale x 3 x i32> %a) {
767756
define <vscale x 4 x half> @ucvtf_h_nxv4i1(<vscale x 4 x i1> %a) {
768757
; CHECK-LABEL: ucvtf_h_nxv4i1:
769758
; CHECK: // %bb.0:
770-
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
771-
; CHECK-NEXT: ptrue p0.s
772-
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
759+
; CHECK-NEXT: mov z0.h, #0 // =0x0
760+
; CHECK-NEXT: fmov z0.h, p0/m, #1.00000000
773761
; CHECK-NEXT: ret
774762
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x half>
775763
ret <vscale x 4 x half> %res
@@ -798,9 +786,8 @@ define <vscale x 4 x half> @ucvtf_h_nxv4i32(<vscale x 4 x i32> %a) {
798786
define <vscale x 8 x half> @ucvtf_h_nxv8i1(<vscale x 8 x i1> %a) {
799787
; CHECK-LABEL: ucvtf_h_nxv8i1:
800788
; CHECK: // %bb.0:
801-
; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1
802-
; CHECK-NEXT: ptrue p0.h
803-
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
789+
; CHECK-NEXT: mov z0.h, #0 // =0x0
790+
; CHECK-NEXT: fmov z0.h, p0/m, #1.00000000
804791
; CHECK-NEXT: ret
805792
%res = uitofp <vscale x 8 x i1> %a to <vscale x 8 x half>
806793
ret <vscale x 8 x half> %res
@@ -819,9 +806,8 @@ define <vscale x 8 x half> @ucvtf_h_nxv8i16(<vscale x 8 x i16> %a) {
819806
define <vscale x 2 x float> @ucvtf_s_nxv2i1(<vscale x 2 x i1> %a) {
820807
; CHECK-LABEL: ucvtf_s_nxv2i1:
821808
; CHECK: // %bb.0:
822-
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
823-
; CHECK-NEXT: ptrue p0.d
824-
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
809+
; CHECK-NEXT: mov z0.s, #0 // =0x0
810+
; CHECK-NEXT: fmov z0.s, p0/m, #1.00000000
825811
; CHECK-NEXT: ret
826812
%res = uitofp <vscale x 2 x i1> %a to <vscale x 2 x float>
827813
ret <vscale x 2 x float> %res
@@ -850,9 +836,8 @@ define <vscale x 2 x float> @ucvtf_s_nxv2i64(<vscale x 2 x i64> %a) {
850836
define <vscale x 4 x float> @ucvtf_s_nxv4i1(<vscale x 4 x i1> %a) {
851837
; CHECK-LABEL: ucvtf_s_nxv4i1:
852838
; CHECK: // %bb.0:
853-
; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
854-
; CHECK-NEXT: ptrue p0.s
855-
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s
839+
; CHECK-NEXT: mov z0.s, #0 // =0x0
840+
; CHECK-NEXT: fmov z0.s, p0/m, #1.00000000
856841
; CHECK-NEXT: ret
857842
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x float>
858843
ret <vscale x 4 x float> %res
@@ -871,9 +856,8 @@ define <vscale x 4 x float> @ucvtf_s_nxv4i32(<vscale x 4 x i32> %a) {
871856
define <vscale x 2 x double> @ucvtf_d_nxv2i1(<vscale x 2 x i1> %a) {
872857
; CHECK-LABEL: ucvtf_d_nxv2i1:
873858
; CHECK: // %bb.0:
874-
; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1
875-
; CHECK-NEXT: ptrue p0.d
876-
; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d
859+
; CHECK-NEXT: mov z0.d, #0 // =0x0
860+
; CHECK-NEXT: fmov z0.d, p0/m, #1.00000000
877861
; CHECK-NEXT: ret
878862
%res = uitofp <vscale x 2 x i1> %a to <vscale x 2 x double>
879863
ret <vscale x 2 x double> %res

llvm/test/CodeGen/AArch64/sve-split-fcvt.ll

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -331,13 +331,12 @@ define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
331331
define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
332332
; CHECK-LABEL: scvtf_d_nxv4i1:
333333
; CHECK: // %bb.0:
334-
; CHECK-NEXT: punpklo p2.h, p0.b
334+
; CHECK-NEXT: mov z1.d, #0 // =0x0
335+
; CHECK-NEXT: punpklo p1.h, p0.b
335336
; CHECK-NEXT: punpkhi p0.h, p0.b
336-
; CHECK-NEXT: mov z0.d, p2/z, #-1 // =0xffffffffffffffff
337-
; CHECK-NEXT: ptrue p1.d
338-
; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
339-
; CHECK-NEXT: scvtf z0.d, p1/m, z0.d
340-
; CHECK-NEXT: scvtf z1.d, p1/m, z1.d
337+
; CHECK-NEXT: mov z0.d, z1.d
338+
; CHECK-NEXT: fmov z1.d, p0/m, #-1.00000000
339+
; CHECK-NEXT: fmov z0.d, p1/m, #-1.00000000
341340
; CHECK-NEXT: ret
342341
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
343342
ret <vscale x 4 x double> %res
@@ -393,13 +392,12 @@ define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
393392
define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
394393
; CHECK-LABEL: ucvtf_d_nxv4i1:
395394
; CHECK: // %bb.0:
396-
; CHECK-NEXT: punpklo p2.h, p0.b
395+
; CHECK-NEXT: mov z1.d, #0 // =0x0
396+
; CHECK-NEXT: punpklo p1.h, p0.b
397397
; CHECK-NEXT: punpkhi p0.h, p0.b
398-
; CHECK-NEXT: mov z0.d, p2/z, #1 // =0x1
399-
; CHECK-NEXT: ptrue p1.d
400-
; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
401-
; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d
402-
; CHECK-NEXT: ucvtf z1.d, p1/m, z1.d
398+
; CHECK-NEXT: mov z0.d, z1.d
399+
; CHECK-NEXT: fmov z1.d, p0/m, #1.00000000
400+
; CHECK-NEXT: fmov z0.d, p1/m, #1.00000000
403401
; CHECK-NEXT: ret
404402
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
405403
ret <vscale x 4 x double> %res

0 commit comments

Comments
 (0)