Skip to content

Commit f246b5f

Browse files
authored
[LoongArch] Support bswap for LSX/LASX VTs (#114171)
On top of #114170
1 parent 8129b6b commit f246b5f

File tree

5 files changed

+24
-88
lines changed

5 files changed

+24
-88
lines changed

llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
269269
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
270270
Expand);
271271
}
272+
for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
273+
setOperationAction(ISD::BSWAP, VT, Legal);
272274
for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
273275
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
274276
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
@@ -317,6 +319,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
317319
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
318320
Expand);
319321
}
322+
for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64})
323+
setOperationAction(ISD::BSWAP, VT, Legal);
320324
for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
321325
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
322326
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);

llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,12 @@ def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))),
14441444
def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
14451445
(XVBITREVI_D LASX256:$xj, uimm6:$imm)>;
14461446

1447+
// Vector bswaps
1448+
def : Pat<(bswap (v16i16 LASX256:$xj)), (XVSHUF4I_B LASX256:$xj, 0b10110001)>;
1449+
def : Pat<(bswap (v8i32 LASX256:$xj)), (XVSHUF4I_B LASX256:$xj, 0b00011011)>;
1450+
def : Pat<(bswap (v4i64 LASX256:$xj)),
1451+
(XVSHUF4I_W (XVSHUF4I_B LASX256:$xj, 0b00011011), 0b10110001)>;
1452+
14471453
// XVFADD_{S/D}
14481454
defm : PatXrXrF<fadd, "XVFADD">;
14491455

llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,6 +1600,12 @@ def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))),
16001600
def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))),
16011601
(VBITREVI_D LSX128:$vj, uimm6:$imm)>;
16021602

1603+
// Vector bswaps
1604+
def : Pat<(bswap (v8i16 LSX128:$vj)), (VSHUF4I_B LSX128:$vj, 0b10110001)>;
1605+
def : Pat<(bswap (v4i32 LSX128:$vj)), (VSHUF4I_B LSX128:$vj, 0b00011011)>;
1606+
def : Pat<(bswap (v2i64 LSX128:$vj)),
1607+
(VSHUF4I_W (VSHUF4I_B LSX128:$vj, 0b00011011), 0b10110001)>;
1608+
16031609
// VFADD_{S/D}
16041610
defm : PatVrVrF<fadd, "VFADD">;
16051611

llvm/test/CodeGen/LoongArch/lasx/bswap.ll

Lines changed: 4 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ define void @bswap_v16i16(ptr %src, ptr %dst) nounwind {
55
; CHECK-LABEL: bswap_v16i16:
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: xvld $xr0, $a0, 0
8-
; CHECK-NEXT: xvsrli.h $xr1, $xr0, 8
9-
; CHECK-NEXT: xvslli.h $xr0, $xr0, 8
10-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
8+
; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 177
119
; CHECK-NEXT: xvst $xr0, $a1, 0
1210
; CHECK-NEXT: ret
1311
%v = load <16 x i16>, ptr %src
@@ -20,18 +18,7 @@ define void @bswap_v8i32(ptr %src, ptr %dst) nounwind {
2018
; CHECK-LABEL: bswap_v8i32:
2119
; CHECK: # %bb.0:
2220
; CHECK-NEXT: xvld $xr0, $a0, 0
23-
; CHECK-NEXT: lu12i.w $a0, 15
24-
; CHECK-NEXT: ori $a0, $a0, 3840
25-
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a0
26-
; CHECK-NEXT: xvsrli.w $xr2, $xr0, 8
27-
; CHECK-NEXT: xvand.v $xr2, $xr2, $xr1
28-
; CHECK-NEXT: xvsrli.w $xr3, $xr0, 24
29-
; CHECK-NEXT: xvor.v $xr2, $xr2, $xr3
30-
; CHECK-NEXT: xvand.v $xr1, $xr0, $xr1
31-
; CHECK-NEXT: xvslli.w $xr1, $xr1, 8
32-
; CHECK-NEXT: xvslli.w $xr0, $xr0, 24
33-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
34-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
21+
; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
3522
; CHECK-NEXT: xvst $xr0, $a1, 0
3623
; CHECK-NEXT: ret
3724
%v = load <8 x i32>, ptr %src
@@ -44,35 +31,8 @@ define void @bswap_v4i64(ptr %src, ptr %dst) nounwind {
4431
; CHECK-LABEL: bswap_v4i64:
4532
; CHECK: # %bb.0:
4633
; CHECK-NEXT: xvld $xr0, $a0, 0
47-
; CHECK-NEXT: lu12i.w $a0, 4080
48-
; CHECK-NEXT: xvreplgr2vr.d $xr1, $a0
49-
; CHECK-NEXT: xvsrli.d $xr2, $xr0, 24
50-
; CHECK-NEXT: xvand.v $xr2, $xr2, $xr1
51-
; CHECK-NEXT: lu12i.w $a0, -4096
52-
; CHECK-NEXT: lu32i.d $a0, 0
53-
; CHECK-NEXT: xvreplgr2vr.d $xr3, $a0
54-
; CHECK-NEXT: xvsrli.d $xr4, $xr0, 8
55-
; CHECK-NEXT: xvand.v $xr4, $xr4, $xr3
56-
; CHECK-NEXT: xvor.v $xr2, $xr4, $xr2
57-
; CHECK-NEXT: lu12i.w $a0, 15
58-
; CHECK-NEXT: ori $a0, $a0, 3840
59-
; CHECK-NEXT: xvreplgr2vr.d $xr4, $a0
60-
; CHECK-NEXT: xvsrli.d $xr5, $xr0, 40
61-
; CHECK-NEXT: xvand.v $xr5, $xr5, $xr4
62-
; CHECK-NEXT: xvsrli.d $xr6, $xr0, 56
63-
; CHECK-NEXT: xvor.v $xr5, $xr5, $xr6
64-
; CHECK-NEXT: xvor.v $xr2, $xr2, $xr5
65-
; CHECK-NEXT: xvand.v $xr1, $xr0, $xr1
66-
; CHECK-NEXT: xvslli.d $xr1, $xr1, 24
67-
; CHECK-NEXT: xvand.v $xr3, $xr0, $xr3
68-
; CHECK-NEXT: xvslli.d $xr3, $xr3, 8
69-
; CHECK-NEXT: xvor.v $xr1, $xr1, $xr3
70-
; CHECK-NEXT: xvand.v $xr3, $xr0, $xr4
71-
; CHECK-NEXT: xvslli.d $xr3, $xr3, 40
72-
; CHECK-NEXT: xvslli.d $xr0, $xr0, 56
73-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr3
74-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
75-
; CHECK-NEXT: xvor.v $xr0, $xr0, $xr2
34+
; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
35+
; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 177
7636
; CHECK-NEXT: xvst $xr0, $a1, 0
7737
; CHECK-NEXT: ret
7838
%v = load <4 x i64>, ptr %src

llvm/test/CodeGen/LoongArch/lsx/bswap.ll

Lines changed: 4 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,7 @@ define void @bswap_v8i16(ptr %src, ptr %dst) nounwind {
55
; CHECK-LABEL: bswap_v8i16:
66
; CHECK: # %bb.0:
77
; CHECK-NEXT: vld $vr0, $a0, 0
8-
; CHECK-NEXT: vsrli.h $vr1, $vr0, 8
9-
; CHECK-NEXT: vslli.h $vr0, $vr0, 8
10-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
8+
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 177
119
; CHECK-NEXT: vst $vr0, $a1, 0
1210
; CHECK-NEXT: ret
1311
%v = load <8 x i16>, ptr %src
@@ -20,18 +18,7 @@ define void @bswap_v4i32(ptr %src, ptr %dst) nounwind {
2018
; CHECK-LABEL: bswap_v4i32:
2119
; CHECK: # %bb.0:
2220
; CHECK-NEXT: vld $vr0, $a0, 0
23-
; CHECK-NEXT: lu12i.w $a0, 15
24-
; CHECK-NEXT: ori $a0, $a0, 3840
25-
; CHECK-NEXT: vreplgr2vr.w $vr1, $a0
26-
; CHECK-NEXT: vsrli.w $vr2, $vr0, 8
27-
; CHECK-NEXT: vand.v $vr2, $vr2, $vr1
28-
; CHECK-NEXT: vsrli.w $vr3, $vr0, 24
29-
; CHECK-NEXT: vor.v $vr2, $vr2, $vr3
30-
; CHECK-NEXT: vand.v $vr1, $vr0, $vr1
31-
; CHECK-NEXT: vslli.w $vr1, $vr1, 8
32-
; CHECK-NEXT: vslli.w $vr0, $vr0, 24
33-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
34-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr2
21+
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
3522
; CHECK-NEXT: vst $vr0, $a1, 0
3623
; CHECK-NEXT: ret
3724
%v = load <4 x i32>, ptr %src
@@ -44,35 +31,8 @@ define void @bswap_v2i64(ptr %src, ptr %dst) nounwind {
4431
; CHECK-LABEL: bswap_v2i64:
4532
; CHECK: # %bb.0:
4633
; CHECK-NEXT: vld $vr0, $a0, 0
47-
; CHECK-NEXT: lu12i.w $a0, 4080
48-
; CHECK-NEXT: vreplgr2vr.d $vr1, $a0
49-
; CHECK-NEXT: vsrli.d $vr2, $vr0, 24
50-
; CHECK-NEXT: vand.v $vr2, $vr2, $vr1
51-
; CHECK-NEXT: lu12i.w $a0, -4096
52-
; CHECK-NEXT: lu32i.d $a0, 0
53-
; CHECK-NEXT: vreplgr2vr.d $vr3, $a0
54-
; CHECK-NEXT: vsrli.d $vr4, $vr0, 8
55-
; CHECK-NEXT: vand.v $vr4, $vr4, $vr3
56-
; CHECK-NEXT: vor.v $vr2, $vr4, $vr2
57-
; CHECK-NEXT: lu12i.w $a0, 15
58-
; CHECK-NEXT: ori $a0, $a0, 3840
59-
; CHECK-NEXT: vreplgr2vr.d $vr4, $a0
60-
; CHECK-NEXT: vsrli.d $vr5, $vr0, 40
61-
; CHECK-NEXT: vand.v $vr5, $vr5, $vr4
62-
; CHECK-NEXT: vsrli.d $vr6, $vr0, 56
63-
; CHECK-NEXT: vor.v $vr5, $vr5, $vr6
64-
; CHECK-NEXT: vor.v $vr2, $vr2, $vr5
65-
; CHECK-NEXT: vand.v $vr1, $vr0, $vr1
66-
; CHECK-NEXT: vslli.d $vr1, $vr1, 24
67-
; CHECK-NEXT: vand.v $vr3, $vr0, $vr3
68-
; CHECK-NEXT: vslli.d $vr3, $vr3, 8
69-
; CHECK-NEXT: vor.v $vr1, $vr1, $vr3
70-
; CHECK-NEXT: vand.v $vr3, $vr0, $vr4
71-
; CHECK-NEXT: vslli.d $vr3, $vr3, 40
72-
; CHECK-NEXT: vslli.d $vr0, $vr0, 56
73-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr3
74-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
75-
; CHECK-NEXT: vor.v $vr0, $vr0, $vr2
34+
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
35+
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 177
7636
; CHECK-NEXT: vst $vr0, $a1, 0
7737
; CHECK-NEXT: ret
7838
%v = load <2 x i64>, ptr %src

0 commit comments

Comments
 (0)