Skip to content

Commit de21308

Browse files
committed
[LoongArch] Make ISD::VSELECT a legal operation with lsx/lasx
1 parent 2ea60f4 commit de21308

File tree

5 files changed

+191
-0
lines changed

5 files changed

+191
-0
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
246246
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
247247
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
248248
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
249+
250+
setOperationAction(ISD::VSELECT, VT, Legal);
249251
}
250252
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
251253
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
@@ -277,6 +279,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
277279
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
278280
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
279281
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
282+
283+
setOperationAction(ISD::VSELECT, VT, Legal);
280284
}
281285
for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
282286
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
@@ -314,6 +318,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
314318
setStackPointerRegisterToSaveRestore(LoongArch::R3);
315319

316320
setBooleanContents(ZeroOrOneBooleanContent);
321+
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
317322

318323
setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
319324

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,6 +1480,14 @@ def : Pat<(f32 (vector_extract v8f32:$xj, i64:$rk)),
14801480
def : Pat<(f64 (vector_extract v4f64:$xj, i64:$rk)),
14811481
(f64 (EXTRACT_SUBREG (XVREPLVE_D v4f64:$xj, i64:$rk), sub_64))>;
14821482

1483+
// vselect
1484+
def : Pat<(v32i8 (vselect LASX256:$xj, LASX256:$xd,
1485+
(v32i8 (SplatPat_uimm8 uimm8:$imm)))),
1486+
(XVBITSELI_B LASX256:$xd, LASX256:$xj, uimm8:$imm)>;
1487+
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in
1488+
def : Pat<(vt (vselect LASX256:$xa, LASX256:$xk, LASX256:$xj)),
1489+
(XVBITSEL_V LASX256:$xj, LASX256:$xk, LASX256:$xa)>;
1490+
14831491
} // Predicates = [HasExtLASX]
14841492

14851493
/// Intrinsic pattern

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1564,6 +1564,14 @@ def : Pat<(f32 (vector_extract v4f32:$vj, i64:$rk)),
15641564
def : Pat<(f64 (vector_extract v2f64:$vj, i64:$rk)),
15651565
(f64 (EXTRACT_SUBREG (VREPLVE_D v2f64:$vj, i64:$rk), sub_64))>;
15661566

1567+
// vselect
1568+
def : Pat<(v16i8 (vselect LSX128:$vj, LSX128:$vd,
1569+
(v16i8 (SplatPat_uimm8 uimm8:$imm)))),
1570+
(VBITSELI_B LSX128:$vd, LSX128:$vj, uimm8:$imm)>;
1571+
foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in
1572+
def : Pat<(vt (vselect LSX128:$va, LSX128:$vk, LSX128:$vj)),
1573+
(VBITSEL_V LSX128:$vj, LSX128:$vk, LSX128:$va)>;
1574+
15671575
} // Predicates = [HasExtLSX]
15681576

15691577
/// Intrinsic pattern
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
3+
4+
define void @select_v32i8_imm(ptr %res, ptr %a0) nounwind {
5+
; CHECK-LABEL: select_v32i8_imm:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: xvld $xr0, $a1, 0
8+
; CHECK-NEXT: xvrepli.h $xr1, -256
9+
; CHECK-NEXT: xvbitseli.b $xr0, $xr1, 1
10+
; CHECK-NEXT: xvst $xr0, $a0, 0
11+
; CHECK-NEXT: ret
12+
%v0 = load <32 x i8>, ptr %a0
13+
%sel = select <32 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14+
store <32 x i8> %sel, ptr %res
15+
ret void
16+
}
17+
18+
define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind {
19+
; CHECK-LABEL: select_v32i8:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: xvld $xr0, $a1, 0
22+
; CHECK-NEXT: xvld $xr1, $a2, 0
23+
; CHECK-NEXT: xvrepli.h $xr2, -256
24+
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
25+
; CHECK-NEXT: xvst $xr0, $a0, 0
26+
; CHECK-NEXT: ret
27+
%v0 = load <32 x i8>, ptr %a0
28+
%v1 = load <32 x i8>, ptr %a1
29+
%sel = select <32 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> %v1
30+
store <32 x i8> %sel, ptr %res
31+
ret void
32+
}
33+
34+
define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind {
35+
; CHECK-LABEL: select_v16i16:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: lu12i.w $a3, -16
38+
; CHECK-NEXT: xvreplgr2vr.w $xr0, $a3
39+
; CHECK-NEXT: xvld $xr1, $a1, 0
40+
; CHECK-NEXT: xvld $xr2, $a2, 0
41+
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
42+
; CHECK-NEXT: xvst $xr0, $a0, 0
43+
; CHECK-NEXT: ret
44+
%v0 = load <16 x i16>, ptr %a0
45+
%v1 = load <16 x i16>, ptr %a1
46+
%sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i16> %v0, <16 x i16> %v1
47+
store <16 x i16> %sel, ptr %res
48+
ret void
49+
}
50+
51+
define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind {
52+
; CHECK-LABEL: select_v8i32:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: xvld $xr0, $a1, 0
55+
; CHECK-NEXT: xvld $xr1, $a2, 0
56+
; CHECK-NEXT: ori $a1, $zero, 0
57+
; CHECK-NEXT: lu32i.d $a1, -1
58+
; CHECK-NEXT: xvreplgr2vr.d $xr2, $a1
59+
; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2
60+
; CHECK-NEXT: xvst $xr0, $a0, 0
61+
; CHECK-NEXT: ret
62+
%v0 = load <8 x i32>, ptr %a0
63+
%v1 = load <8 x i32>, ptr %a1
64+
%sel = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i32> %v0, <8 x i32> %v1
65+
store <8 x i32> %sel, ptr %res
66+
ret void
67+
}
68+
69+
define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind {
70+
; CHECK-LABEL: select_v4i64:
71+
; CHECK: # %bb.0:
72+
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
73+
; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0)
74+
; CHECK-NEXT: xvld $xr0, $a3, 0
75+
; CHECK-NEXT: xvld $xr1, $a1, 0
76+
; CHECK-NEXT: xvld $xr2, $a2, 0
77+
; CHECK-NEXT: xvbitsel.v $xr0, $xr2, $xr1, $xr0
78+
; CHECK-NEXT: xvst $xr0, $a0, 0
79+
; CHECK-NEXT: ret
80+
%v0 = load <4 x i64>, ptr %a0
81+
%v1 = load <4 x i64>, ptr %a1
82+
%sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %v0, <4 x i64> %v1
83+
store <4 x i64> %sel, ptr %res
84+
ret void
85+
}
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
3+
4+
define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind {
5+
; CHECK-LABEL: select_v16i8_imm:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vld $vr0, $a1, 0
8+
; CHECK-NEXT: vrepli.h $vr1, -256
9+
; CHECK-NEXT: vbitseli.b $vr0, $vr1, 255
10+
; CHECK-NEXT: vst $vr0, $a0, 0
11+
; CHECK-NEXT: ret
12+
%v0 = load <16 x i8>, ptr %a0
13+
%sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> %v0, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
14+
store <16 x i8> %sel, ptr %res
15+
ret void
16+
}
17+
18+
define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind {
19+
; CHECK-LABEL: select_v16i8:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: vld $vr0, $a1, 0
22+
; CHECK-NEXT: vld $vr1, $a2, 0
23+
; CHECK-NEXT: vrepli.h $vr2, -256
24+
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
25+
; CHECK-NEXT: vst $vr0, $a0, 0
26+
; CHECK-NEXT: ret
27+
%v0 = load <16 x i8>, ptr %a0
28+
%v1 = load <16 x i8>, ptr %a1
29+
%sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> %v0, <16 x i8> %v1
30+
store <16 x i8> %sel, ptr %res
31+
ret void
32+
}
33+
34+
define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind {
35+
; CHECK-LABEL: select_v8i16:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: lu12i.w $a3, -16
38+
; CHECK-NEXT: vreplgr2vr.w $vr0, $a3
39+
; CHECK-NEXT: vld $vr1, $a1, 0
40+
; CHECK-NEXT: vld $vr2, $a2, 0
41+
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
42+
; CHECK-NEXT: vst $vr0, $a0, 0
43+
; CHECK-NEXT: ret
44+
%v0 = load <8 x i16>, ptr %a0
45+
%v1 = load <8 x i16>, ptr %a1
46+
%sel = select <8 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <8 x i16> %v0, <8 x i16> %v1
47+
store <8 x i16> %sel, ptr %res
48+
ret void
49+
}
50+
51+
define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind {
52+
; CHECK-LABEL: select_v4i32:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: vld $vr0, $a1, 0
55+
; CHECK-NEXT: vld $vr1, $a2, 0
56+
; CHECK-NEXT: ori $a1, $zero, 0
57+
; CHECK-NEXT: lu32i.d $a1, -1
58+
; CHECK-NEXT: vreplgr2vr.d $vr2, $a1
59+
; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2
60+
; CHECK-NEXT: vst $vr0, $a0, 0
61+
; CHECK-NEXT: ret
62+
%v0 = load <4 x i32>, ptr %a0
63+
%v1 = load <4 x i32>, ptr %a1
64+
%sel = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> %v0, <4 x i32> %v1
65+
store <4 x i32> %sel, ptr %res
66+
ret void
67+
}
68+
69+
define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind {
70+
; CHECK-LABEL: select_v2i64:
71+
; CHECK: # %bb.0:
72+
; CHECK-NEXT: pcalau12i $a3, %pc_hi20(.LCPI4_0)
73+
; CHECK-NEXT: addi.d $a3, $a3, %pc_lo12(.LCPI4_0)
74+
; CHECK-NEXT: vld $vr0, $a3, 0
75+
; CHECK-NEXT: vld $vr1, $a1, 0
76+
; CHECK-NEXT: vld $vr2, $a2, 0
77+
; CHECK-NEXT: vbitsel.v $vr0, $vr2, $vr1, $vr0
78+
; CHECK-NEXT: vst $vr0, $a0, 0
79+
; CHECK-NEXT: ret
80+
%v0 = load <2 x i64>, ptr %a0
81+
%v1 = load <2 x i64>, ptr %a1
82+
%sel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v0, <2 x i64> %v1
83+
store <2 x i64> %sel, ptr %res
84+
ret void
85+
}

0 commit comments

Comments
 (0)