Skip to content

[LoongArch] Support bswap for LSX/LASX VTs #114171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 31, 2024
Merged

Conversation

xen0n
Copy link
Contributor

@xen0n xen0n commented Oct 30, 2024

On top of #114170

@llvmbot
Copy link
Member

llvmbot commented Oct 30, 2024

@llvm/pr-subscribers-backend-loongarch

Author: WÁNG Xuěruì (xen0n)

Changes

On top of #114170


Full diff: https://github.com/llvm/llvm-project/pull/114171.diff

5 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+6)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+6)
  • (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+6)
  • (added) llvm/test/CodeGen/LoongArch/lasx/bswap.ll (+46)
  • (added) llvm/test/CodeGen/LoongArch/lsx/bswap.ll (+46)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index e2c644a56c95b0..f7d0da265d9b94 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -269,6 +269,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
           Expand);
     }
+    for (MVT VT : {MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+      setOperationAction(ISD::BSWAP, VT, Legal);
+    }
     for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
@@ -317,6 +320,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
           Expand);
     }
+    for (MVT VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
+      setOperationAction(ISD::BSWAP, VT, Legal);
+    }
     for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index d13cc9af135b57..3e39e2c10a617a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1444,6 +1444,12 @@ def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))),
 def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
           (XVBITREVI_D LASX256:$xj, uimm6:$imm)>;
 
+// Vector bswaps
+def : Pat<(bswap (v16i16 LASX256:$xj)), (XVSHUF4I_B LASX256:$xj, 0b10110001)>;
+def : Pat<(bswap (v8i32 LASX256:$xj)), (XVSHUF4I_B LASX256:$xj, 0b00011011)>;
+def : Pat<(bswap (v4i64 LASX256:$xj)),
+          (XVSHUF4I_W (XVSHUF4I_B LASX256:$xj, 0b00011011), 0b10110001)>;
+
 // XVFADD_{S/D}
 defm : PatXrXrF<fadd, "XVFADD">;
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 86aa6dcfd8261f..525d2802daa235 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1600,6 +1600,12 @@ def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))),
 def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))),
           (VBITREVI_D LSX128:$vj, uimm6:$imm)>;
 
+// Vector bswaps
+def : Pat<(bswap (v8i16 LSX128:$vj)), (VSHUF4I_B LSX128:$vj, 0b10110001)>;
+def : Pat<(bswap (v4i32 LSX128:$vj)), (VSHUF4I_B LSX128:$vj, 0b00011011)>;
+def : Pat<(bswap (v2i64 LSX128:$vj)),
+          (VSHUF4I_W (VSHUF4I_B LSX128:$vj, 0b00011011), 0b10110001)>;
+
 // VFADD_{S/D}
 defm : PatVrVrF<fadd, "VFADD">;
 
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bswap.ll b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll
new file mode 100644
index 00000000000000..1b0132d25ed591
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/bswap.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define void @bswap_v16i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a0, 0
+; CHECK-NEXT:    xvshuf4i.b $xr0, $xr0, 177
+; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <16 x i16>, ptr %src
+  %res = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> %v)
+  store <16 x i16> %res, ptr %dst
+  ret void
+}
+
+define void @bswap_v8i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a0, 0
+; CHECK-NEXT:    xvshuf4i.b $xr0, $xr0, 27
+; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <8 x i32>, ptr %src
+  %res = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> %v)
+  store <8 x i32> %res, ptr %dst
+  ret void
+}
+
+define void @bswap_v4i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xvld $xr0, $a0, 0
+; CHECK-NEXT:    xvshuf4i.b $xr0, $xr0, 27
+; CHECK-NEXT:    xvshuf4i.w $xr0, $xr0, 177
+; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <4 x i64>, ptr %src
+  %res = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> %v)
+  store <4 x i64> %res, ptr %dst
+  ret void
+}
+
+declare <16 x i16> @llvm.bswap.v16i16(<16 x i16>)
+declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>)
+declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bswap.ll b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll
new file mode 100644
index 00000000000000..8172e21eae34df
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/bswap.ll
@@ -0,0 +1,46 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define void @bswap_v8i16(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vshuf4i.b $vr0, $vr0, 177
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <8 x i16>, ptr %src
+  %res = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %v)
+  store <8 x i16> %res, ptr %dst
+  ret void
+}
+
+define void @bswap_v4i32(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vshuf4i.b $vr0, $vr0, 27
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <4 x i32>, ptr %src
+  %res = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %v)
+  store <4 x i32> %res, ptr %dst
+  ret void
+}
+
+define void @bswap_v2i64(ptr %src, ptr %dst) nounwind {
+; CHECK-LABEL: bswap_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vshuf4i.b $vr0, $vr0, 27
+; CHECK-NEXT:    vshuf4i.w $vr0, $vr0, 177
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+  %v = load <2 x i64>, ptr %src
+  %res = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %v)
+  store <2 x i64> %res, ptr %dst
+  ret void
+}
+
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)

@xen0n
Copy link
Contributor Author

xen0n commented Oct 30, 2024

Copy link
Member

@heiher heiher left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with nits. Thanks.

Copy link
Contributor

@SixWeining SixWeining left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thanks.

@SixWeining
Copy link
Contributor

need rebase since #114170 is merged

While the LSX/LASX instruction sets seem to not include byteswap
functionality, it is actually possible through the low-overhead
{,X}VSHUF4I family of instructions, in contrast to the naïvely expanded
code sequence which is very inefficient.
@heiher heiher merged commit f246b5f into llvm:main Oct 31, 2024
8 checks passed
@xen0n xen0n deleted the loong-simd-bswap branch October 31, 2024 18:08
smallp-o-p pushed a commit to smallp-o-p/llvm-project that referenced this pull request Nov 3, 2024
NoumanAmir657 pushed a commit to NoumanAmir657/llvm-project that referenced this pull request Nov 4, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants