Skip to content

Commit a60a542

Browse files
committed
Reland "[LoongArch] Support CTLZ with lsx/lasx"
This patch simultaneously adds tests for `CTPOP`. This relands 07cec73 with fix tests.
1 parent 63e6bba commit a60a542

File tree

5 files changed

+255
-10
lines changed

5 files changed

+255
-10
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
256256
VT, Legal);
257257
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
258258
setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
259-
setOperationAction(ISD::CTPOP, VT, Legal);
259+
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
260260
}
261261
for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
262262
setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
@@ -286,7 +286,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
286286
VT, Legal);
287287
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
288288
setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
289-
setOperationAction(ISD::CTPOP, VT, Legal);
289+
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
290290
}
291291
for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
292292
setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
@@ -2827,6 +2827,15 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
28272827
case Intrinsic::loongarch_lasx_xvsrai_d:
28282828
return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
28292829
lowerVectorSplatImm<6>(N, 2, DAG));
2830+
case Intrinsic::loongarch_lsx_vclz_b:
2831+
case Intrinsic::loongarch_lsx_vclz_h:
2832+
case Intrinsic::loongarch_lsx_vclz_w:
2833+
case Intrinsic::loongarch_lsx_vclz_d:
2834+
case Intrinsic::loongarch_lasx_xvclz_b:
2835+
case Intrinsic::loongarch_lasx_xvclz_h:
2836+
case Intrinsic::loongarch_lasx_xvclz_w:
2837+
case Intrinsic::loongarch_lasx_xvclz_d:
2838+
return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
28302839
case Intrinsic::loongarch_lsx_vpcnt_b:
28312840
case Intrinsic::loongarch_lsx_vpcnt_h:
28322841
case Intrinsic::loongarch_lsx_vpcnt_w:

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,6 +1273,9 @@ defm : PatXrXr<sra, "XVSRA">;
12731273
defm : PatShiftXrXr<sra, "XVSRA">;
12741274
defm : PatShiftXrUimm<sra, "XVSRAI">;
12751275

1276+
// XVCLZ_{B/H/W/D}
1277+
defm : PatXr<ctlz, "XVCLZ">;
1278+
12761279
// XVPCNT_{B/H/W/D}
12771280
defm : PatXr<ctpop, "XVPCNT">;
12781281

@@ -1590,26 +1593,26 @@ foreach Inst = ["XVMADDWEV_Q_D", "XVMADDWOD_Q_D", "XVMADDWEV_Q_DU",
15901593
// (LAInst vty:$xj)>;
15911594
foreach Inst = ["XVEXTH_H_B", "XVEXTH_HU_BU",
15921595
"XVMSKLTZ_B", "XVMSKGEZ_B", "XVMSKNZ_B",
1593-
"XVCLO_B", "XVCLZ_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU",
1596+
"XVCLO_B", "VEXT2XV_H_B", "VEXT2XV_HU_BU",
15941597
"VEXT2XV_W_B", "VEXT2XV_WU_BU", "VEXT2XV_D_B",
15951598
"VEXT2XV_DU_BU", "XVREPLVE0_B", "XVREPLVE0_Q"] in
15961599
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v32i8 LASX256:$xj)),
15971600
(!cast<LAInst>(Inst) LASX256:$xj)>;
15981601
foreach Inst = ["XVEXTH_W_H", "XVEXTH_WU_HU", "XVMSKLTZ_H",
1599-
"XVCLO_H", "XVCLZ_H", "XVFCVTL_S_H", "XVFCVTH_S_H",
1602+
"XVCLO_H", "XVFCVTL_S_H", "XVFCVTH_S_H",
16001603
"VEXT2XV_W_H", "VEXT2XV_WU_HU", "VEXT2XV_D_H",
16011604
"VEXT2XV_DU_HU", "XVREPLVE0_H"] in
16021605
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v16i16 LASX256:$xj)),
16031606
(!cast<LAInst>(Inst) LASX256:$xj)>;
16041607
foreach Inst = ["XVEXTH_D_W", "XVEXTH_DU_WU", "XVMSKLTZ_W",
1605-
"XVCLO_W", "XVCLZ_W", "XVFFINT_S_W", "XVFFINT_S_WU",
1608+
"XVCLO_W", "XVFFINT_S_W", "XVFFINT_S_WU",
16061609
"XVFFINTL_D_W", "XVFFINTH_D_W",
16071610
"VEXT2XV_D_W", "VEXT2XV_DU_WU", "XVREPLVE0_W"] in
16081611
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v8i32 LASX256:$xj)),
16091612
(!cast<LAInst>(Inst) LASX256:$xj)>;
16101613
foreach Inst = ["XVEXTH_Q_D", "XVEXTH_QU_DU", "XVMSKLTZ_D",
16111614
"XVEXTL_Q_D", "XVEXTL_QU_DU",
1612-
"XVCLO_D", "XVCLZ_D", "XVFFINT_D_L", "XVFFINT_D_LU",
1615+
"XVCLO_D", "XVFFINT_D_L", "XVFFINT_D_LU",
16131616
"XVREPLVE0_D"] in
16141617
def : Pat<(deriveLASXIntrinsic<Inst>.ret (v4i64 LASX256:$xj)),
16151618
(!cast<LAInst>(Inst) LASX256:$xj)>;

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,9 @@ defm : PatVrVr<sra, "VSRA">;
13501350
defm : PatShiftVrVr<sra, "VSRA">;
13511351
defm : PatShiftVrUimm<sra, "VSRAI">;
13521352

1353+
// VCLZ_{B/H/W/D}
1354+
defm : PatVr<ctlz, "VCLZ">;
1355+
13531356
// VPCNT_{B/H/W/D}
13541357
defm : PatVr<ctpop, "VPCNT">;
13551358

@@ -1674,21 +1677,21 @@ foreach Inst = ["VMADDWEV_Q_D", "VMADDWOD_Q_D", "VMADDWEV_Q_DU",
16741677
// (LAInst vty:$vj)>;
16751678
foreach Inst = ["VEXTH_H_B", "VEXTH_HU_BU",
16761679
"VMSKLTZ_B", "VMSKGEZ_B", "VMSKNZ_B",
1677-
"VCLO_B", "VCLZ_B"] in
1680+
"VCLO_B"] in
16781681
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v16i8 LSX128:$vj)),
16791682
(!cast<LAInst>(Inst) LSX128:$vj)>;
16801683
foreach Inst = ["VEXTH_W_H", "VEXTH_WU_HU", "VMSKLTZ_H",
1681-
"VCLO_H", "VCLZ_H", "VFCVTL_S_H", "VFCVTH_S_H"] in
1684+
"VCLO_H", "VFCVTL_S_H", "VFCVTH_S_H"] in
16821685
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v8i16 LSX128:$vj)),
16831686
(!cast<LAInst>(Inst) LSX128:$vj)>;
16841687
foreach Inst = ["VEXTH_D_W", "VEXTH_DU_WU", "VMSKLTZ_W",
1685-
"VCLO_W", "VCLZ_W", "VFFINT_S_W", "VFFINT_S_WU",
1688+
"VCLO_W", "VFFINT_S_W", "VFFINT_S_WU",
16861689
"VFFINTL_D_W", "VFFINTH_D_W"] in
16871690
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v4i32 LSX128:$vj)),
16881691
(!cast<LAInst>(Inst) LSX128:$vj)>;
16891692
foreach Inst = ["VEXTH_Q_D", "VEXTH_QU_DU", "VMSKLTZ_D",
16901693
"VEXTL_Q_D", "VEXTL_QU_DU",
1691-
"VCLO_D", "VCLZ_D", "VFFINT_D_L", "VFFINT_D_LU"] in
1694+
"VCLO_D", "VFFINT_D_L", "VFFINT_D_LU"] in
16921695
def : Pat<(deriveLSXIntrinsic<Inst>.ret (v2i64 LSX128:$vj)),
16931696
(!cast<LAInst>(Inst) LSX128:$vj)>;
16941697

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3+
4+
define void @ctpop_v32i8(ptr %src, ptr %dst) nounwind {
5+
; CHECK-LABEL: ctpop_v32i8:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: xvld $xr0, $a0, 0
8+
; CHECK-NEXT: xvpcnt.b $xr0, $xr0
9+
; CHECK-NEXT: xvst $xr0, $a1, 0
10+
; CHECK-NEXT: ret
11+
%v = load <32 x i8>, ptr %src
12+
%res = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %v)
13+
store <32 x i8> %res, ptr %dst
14+
ret void
15+
}
16+
17+
define void @ctpop_v16i16(ptr %src, ptr %dst) nounwind {
18+
; CHECK-LABEL: ctpop_v16i16:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: xvld $xr0, $a0, 0
21+
; CHECK-NEXT: xvpcnt.h $xr0, $xr0
22+
; CHECK-NEXT: xvst $xr0, $a1, 0
23+
; CHECK-NEXT: ret
24+
%v = load <16 x i16>, ptr %src
25+
%res = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %v)
26+
store <16 x i16> %res, ptr %dst
27+
ret void
28+
}
29+
30+
define void @ctpop_v8i32(ptr %src, ptr %dst) nounwind {
31+
; CHECK-LABEL: ctpop_v8i32:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: xvld $xr0, $a0, 0
34+
; CHECK-NEXT: xvpcnt.w $xr0, $xr0
35+
; CHECK-NEXT: xvst $xr0, $a1, 0
36+
; CHECK-NEXT: ret
37+
%v = load <8 x i32>, ptr %src
38+
%res = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %v)
39+
store <8 x i32> %res, ptr %dst
40+
ret void
41+
}
42+
43+
define void @ctpop_v4i64(ptr %src, ptr %dst) nounwind {
44+
; CHECK-LABEL: ctpop_v4i64:
45+
; CHECK: # %bb.0:
46+
; CHECK-NEXT: xvld $xr0, $a0, 0
47+
; CHECK-NEXT: xvpcnt.d $xr0, $xr0
48+
; CHECK-NEXT: xvst $xr0, $a1, 0
49+
; CHECK-NEXT: ret
50+
%v = load <4 x i64>, ptr %src
51+
%res = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %v)
52+
store <4 x i64> %res, ptr %dst
53+
ret void
54+
}
55+
56+
define void @ctlz_v32i8(ptr %src, ptr %dst) nounwind {
57+
; CHECK-LABEL: ctlz_v32i8:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: xvld $xr0, $a0, 0
60+
; CHECK-NEXT: xvclz.b $xr0, $xr0
61+
; CHECK-NEXT: xvst $xr0, $a1, 0
62+
; CHECK-NEXT: ret
63+
%v = load <32 x i8>, ptr %src
64+
%res = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %v, i1 false)
65+
store <32 x i8> %res, ptr %dst
66+
ret void
67+
}
68+
69+
define void @ctlz_v16i16(ptr %src, ptr %dst) nounwind {
70+
; CHECK-LABEL: ctlz_v16i16:
71+
; CHECK: # %bb.0:
72+
; CHECK-NEXT: xvld $xr0, $a0, 0
73+
; CHECK-NEXT: xvclz.h $xr0, $xr0
74+
; CHECK-NEXT: xvst $xr0, $a1, 0
75+
; CHECK-NEXT: ret
76+
%v = load <16 x i16>, ptr %src
77+
%res = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %v, i1 false)
78+
store <16 x i16> %res, ptr %dst
79+
ret void
80+
}
81+
82+
define void @ctlz_v8i32(ptr %src, ptr %dst) nounwind {
83+
; CHECK-LABEL: ctlz_v8i32:
84+
; CHECK: # %bb.0:
85+
; CHECK-NEXT: xvld $xr0, $a0, 0
86+
; CHECK-NEXT: xvclz.w $xr0, $xr0
87+
; CHECK-NEXT: xvst $xr0, $a1, 0
88+
; CHECK-NEXT: ret
89+
%v = load <8 x i32>, ptr %src
90+
%res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %v, i1 false)
91+
store <8 x i32> %res, ptr %dst
92+
ret void
93+
}
94+
95+
define void @ctlz_v4i64(ptr %src, ptr %dst) nounwind {
96+
; CHECK-LABEL: ctlz_v4i64:
97+
; CHECK: # %bb.0:
98+
; CHECK-NEXT: xvld $xr0, $a0, 0
99+
; CHECK-NEXT: xvclz.d $xr0, $xr0
100+
; CHECK-NEXT: xvst $xr0, $a1, 0
101+
; CHECK-NEXT: ret
102+
%v = load <4 x i64>, ptr %src
103+
%res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %v, i1 false)
104+
store <4 x i64> %res, ptr %dst
105+
ret void
106+
}
107+
108+
declare <32 x i8> @llvm.ctpop.v32i8(<32 x i8>)
109+
declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>)
110+
declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>)
111+
declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>)
112+
declare <32 x i8> @llvm.ctlz.v32i8(<32 x i8>, i1)
113+
declare <16 x i16> @llvm.ctlz.v16i16(<16 x i16>, i1)
114+
declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1)
115+
declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1)
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
3+
4+
define void @ctpop_v16i8(ptr %src, ptr %dst) nounwind {
5+
; CHECK-LABEL: ctpop_v16i8:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vld $vr0, $a0, 0
8+
; CHECK-NEXT: vpcnt.b $vr0, $vr0
9+
; CHECK-NEXT: vst $vr0, $a1, 0
10+
; CHECK-NEXT: ret
11+
%v = load <16 x i8>, ptr %src
12+
%res = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %v)
13+
store <16 x i8> %res, ptr %dst
14+
ret void
15+
}
16+
17+
define void @ctpop_v8i16(ptr %src, ptr %dst) nounwind {
18+
; CHECK-LABEL: ctpop_v8i16:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: vld $vr0, $a0, 0
21+
; CHECK-NEXT: vpcnt.h $vr0, $vr0
22+
; CHECK-NEXT: vst $vr0, $a1, 0
23+
; CHECK-NEXT: ret
24+
%v = load <8 x i16>, ptr %src
25+
%res = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %v)
26+
store <8 x i16> %res, ptr %dst
27+
ret void
28+
}
29+
30+
define void @ctpop_v4i32(ptr %src, ptr %dst) nounwind {
31+
; CHECK-LABEL: ctpop_v4i32:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: vld $vr0, $a0, 0
34+
; CHECK-NEXT: vpcnt.w $vr0, $vr0
35+
; CHECK-NEXT: vst $vr0, $a1, 0
36+
; CHECK-NEXT: ret
37+
%v = load <4 x i32>, ptr %src
38+
%res = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %v)
39+
store <4 x i32> %res, ptr %dst
40+
ret void
41+
}
42+
43+
define void @ctpop_v2i64(ptr %src, ptr %dst) nounwind {
44+
; CHECK-LABEL: ctpop_v2i64:
45+
; CHECK: # %bb.0:
46+
; CHECK-NEXT: vld $vr0, $a0, 0
47+
; CHECK-NEXT: vpcnt.d $vr0, $vr0
48+
; CHECK-NEXT: vst $vr0, $a1, 0
49+
; CHECK-NEXT: ret
50+
%v = load <2 x i64>, ptr %src
51+
%res = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %v)
52+
store <2 x i64> %res, ptr %dst
53+
ret void
54+
}
55+
56+
define void @ctlz_v16i8(ptr %src, ptr %dst) nounwind {
57+
; CHECK-LABEL: ctlz_v16i8:
58+
; CHECK: # %bb.0:
59+
; CHECK-NEXT: vld $vr0, $a0, 0
60+
; CHECK-NEXT: vclz.b $vr0, $vr0
61+
; CHECK-NEXT: vst $vr0, $a1, 0
62+
; CHECK-NEXT: ret
63+
%v = load <16 x i8>, ptr %src
64+
%res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %v, i1 false)
65+
store <16 x i8> %res, ptr %dst
66+
ret void
67+
}
68+
69+
define void @ctlz_v8i16(ptr %src, ptr %dst) nounwind {
70+
; CHECK-LABEL: ctlz_v8i16:
71+
; CHECK: # %bb.0:
72+
; CHECK-NEXT: vld $vr0, $a0, 0
73+
; CHECK-NEXT: vclz.h $vr0, $vr0
74+
; CHECK-NEXT: vst $vr0, $a1, 0
75+
; CHECK-NEXT: ret
76+
%v = load <8 x i16>, ptr %src
77+
%res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %v, i1 false)
78+
store <8 x i16> %res, ptr %dst
79+
ret void
80+
}
81+
82+
define void @ctlz_v4i32(ptr %src, ptr %dst) nounwind {
83+
; CHECK-LABEL: ctlz_v4i32:
84+
; CHECK: # %bb.0:
85+
; CHECK-NEXT: vld $vr0, $a0, 0
86+
; CHECK-NEXT: vclz.w $vr0, $vr0
87+
; CHECK-NEXT: vst $vr0, $a1, 0
88+
; CHECK-NEXT: ret
89+
%v = load <4 x i32>, ptr %src
90+
%res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %v, i1 false)
91+
store <4 x i32> %res, ptr %dst
92+
ret void
93+
}
94+
95+
define void @ctlz_v2i64(ptr %src, ptr %dst) nounwind {
96+
; CHECK-LABEL: ctlz_v2i64:
97+
; CHECK: # %bb.0:
98+
; CHECK-NEXT: vld $vr0, $a0, 0
99+
; CHECK-NEXT: vclz.d $vr0, $vr0
100+
; CHECK-NEXT: vst $vr0, $a1, 0
101+
; CHECK-NEXT: ret
102+
%v = load <2 x i64>, ptr %src
103+
%res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %v, i1 false)
104+
store <2 x i64> %res, ptr %dst
105+
ret void
106+
}
107+
108+
declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>)
109+
declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>)
110+
declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>)
111+
declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
112+
declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8>, i1)
113+
declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16>, i1)
114+
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)
115+
declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1)

0 commit comments

Comments
 (0)