-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[LoongArch] Support bswap for LSX/LASX VTs #114171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-loongarch Author: WÁNG Xuěruì (xen0n) ChangesOn top of #114170 Full diff: https://github.com/llvm/llvm-project/pull/114171.diff 5 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e2c644a56c95b0..f7d0da265d9b94 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -269,6 +269,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
Expand);
}
+ for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+ setOperationAction(ISD::BSWAP, VT, Legal);
+ }
for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
@@ -317,6 +320,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
Expand);
}
+ for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+ setOperationAction(ISD::BSWAP, VT, Legal);
+ }
for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index d13cc9af135b57..3e39e2c10a617a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1444,6 +1444,12 @@ def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))),
def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
(XVBITREVI_D LASX256:$xj, uimm6:$imm)>;
+// Vector bswaps
+def : Pat<(bswap (v16i16 LASX256:$xj)), (XVSHUF4I_B LASX256:$xj, 0b10110001)>;
+def : Pat<(bswap (v8i32 LASX256:$xj)), (XVSHUF4I_B LASX256:$xj, 0b00011011)>;
+def : Pat<(bswap (v4i64 LASX256:$xj)),
+ (XVSHUF4I_W (XVSHUF4I_B LASX256:$xj, 0b00011011), 0b10110001)>;
+
// XVFADD_{S/D}
defm : PatXrXrF<fadd, "XVFADD">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 86aa6dcfd8261f..525d2802daa235 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1600,6 +1600,12 @@ def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))),
def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))),
(VBITREVI_D LSX128:$vj, uimm6:$imm)>;
+// Vector bswaps
+def : Pat<(bswap (v8i16 LSX128:$vj)), (VSHUF4I_B LSX128:$vj, 0b10110001)>;
+def : Pat<(bswap (v4i32 LSX128:$vj)), (VSHUF4I_B LSX128:$vj, 0b00011011)>;
+def : Pat<(bswap (v2i64 LSX128:$vj)),
+ (VSHUF4I_W (VSHUF4I_B LSX128:$vj, 0b00011011), 0b10110001)>;
+
// VFADD_{S/D}
defm : PatVrVrF<fadd, "VFADD">;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll
new file mode 100644
index 00000000000000..1b0132d25ed591
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @bswap_v16i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 177
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <16 x i16>, ptr %src
+ %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
+ store <16 x i16> %res, ptr %dst
+ ret void
+}
+
+define void @bswap_v8i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <8 x i32>, ptr %src
+ %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
+ store <8 x i32> %res, ptr %dst
+ ret void
+}
+
+define void @bswap_v4i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v4i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a0, 0
+; CHECK-NEXT: xvshuf4i.b $xr0, $xr0, 27
+; CHECK-NEXT: xvshuf4i.w $xr0, $xr0, 177
+; CHECK-NEXT: xvst $xr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <4 x i64>, ptr %src
+ %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
+ store <4 x i64> %res, ptr %dst
+ ret void
+}
+
+declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
+declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
+declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll
new file mode 100644
index 00000000000000..8172e21eae34df
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define void @bswap_v8i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 177
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <8 x i16>, ptr %src
+ %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
+ store <8 x i16> %res, ptr %dst
+ ret void
+}
+
+define void @bswap_v4i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v4i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <4 x i32>, ptr %src
+ %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
+ store <4 x i32> %res, ptr %dst
+ ret void
+}
+
+define void @bswap_v2i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v2i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vld $vr0, $a0, 0
+; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 27
+; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 177
+; CHECK-NEXT: vst $vr0, $a1, 0
+; CHECK-NEXT: ret
+ %v = load <2 x i64>, ptr %src
+ %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
+ store <2 x i64> %res, ptr %dst
+ ret void
+}
+
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
b6fc547
to
20c39cb
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with nits. Thanks.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks.
need rebase since #114170 is merged |
While the LSX/LASX instruction sets seem to not include byteswap functionality, it is actually possible through the low-overhead {,X}VSHUF4I family of instructions, in contrast to the naïvely expanded code sequence which is very inefficient.
8ba2d94
to
66b4f77
Compare
On top of #114170