Skip to content

[LoongArch] Optimize codegen for ISD::ROTL #100344

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 30, 2024
Merged

[LoongArch] Optimize codegen for ISD::ROTL #100344

merged 2 commits into from
Jul 30, 2024

Conversation

heiher
Copy link
Member

@heiher heiher commented Jul 24, 2024

The LoongArch rotr.{w,d} instruction ignores the high bits of the shift operand, allowing it to generate more efficient code using the constant zero register.

The LoongArch rotr.{w,d} instruction ignores the high bits of the shift operand,
allowing it to generate more efficient code using the constant zero register.
@llvmbot
Copy link
Member

llvmbot commented Jul 24, 2024

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

The LoongArch rotr.{w,d} instruction ignores the high bits of the shift operand, allowing it to generate more efficient code using the constant zero register.


Full diff: https://github.com/llvm/llvm-project/pull/100344.diff

2 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+14-2)
  • (modified) llvm/test/CodeGen/LoongArch/rotl-rotr.ll (+6-12)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index d80509cf39849..185a1b9d52e5b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -78,7 +78,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
-  setOperationAction(ISD::ROTL, GRLenVT, Expand);
+  setOperationAction(ISD::ROTL, GRLenVT, Custom);
   setOperationAction(ISD::CTPOP, GRLenVT, Expand);
 
   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
@@ -2661,7 +2661,8 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
     SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
     if (N->getOpcode() == ISD::ROTL) {
-      SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
+      SDValue TmpOp = DAG.getConstant(
+          isa<ConstantSDNode>(NewOp1.getNode()) ? 32 : 0, DL, MVT::i64);
       NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
     }
     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
@@ -2833,6 +2834,17 @@ void LoongArchTargetLowering::ReplaceNodeResults(
     }
     break;
   case ISD::ROTL:
+    if (VT == MVT::i32 && Subtarget.is64Bit()) {
+      Results.push_back(customLegalizeToWOp(N, DAG, 2));
+    } else {
+      SDValue Op0 = N->getOperand(0);
+      SDValue TmpOp = DAG.getConstant(
+          isa<ConstantSDNode>(Op0.getNode()) ? Subtarget.getGRLen() : 0, DL,
+          VT);
+      SDValue NewOp1 = DAG.getNode(ISD::SUB, DL, VT, TmpOp, N->getOperand(1));
+      Results.push_back(DAG.getNode(ISD::ROTR, DL, VT, Op0, NewOp1));
+    }
+    break;
   case ISD::ROTR:
     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
            "Unexpected custom legalisation");
diff --git a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
index 75461f5820984..774cf614f6099 100644
--- a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
+++ b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
@@ -5,15 +5,13 @@
 define signext i32 @rotl_32(i32 signext %x, i32 signext %y) nounwind {
 ; LA32-LABEL: rotl_32:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    ori $a2, $zero, 32
-; LA32-NEXT:    sub.w $a1, $a2, $a1
+; LA32-NEXT:    sub.w $a1, $zero, $a1
 ; LA32-NEXT:    rotr.w $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: rotl_32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    ori $a2, $zero, 32
-; LA64-NEXT:    sub.d $a1, $a2, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
 ; LA64-NEXT:    rotr.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %z = sub i32 32, %y
@@ -80,8 +78,7 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind {
 ;
 ; LA64-LABEL: rotl_64:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    ori $a2, $zero, 64
-; LA64-NEXT:    sub.d $a1, $a2, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
 ; LA64-NEXT:    rotr.d $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %z = sub i64 64, %y
@@ -149,8 +146,7 @@ define signext i32 @rotl_32_mask(i32 signext %x, i32 signext %y) nounwind {
 ;
 ; LA64-LABEL: rotl_32_mask:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    ori $a2, $zero, 32
-; LA64-NEXT:    sub.d $a1, $a2, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
 ; LA64-NEXT:    rotr.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %z = sub i32 0, %y
@@ -170,8 +166,7 @@ define signext i32 @rotl_32_mask_and_63_and_31(i32 signext %x, i32 signext %y) n
 ;
 ; LA64-LABEL: rotl_32_mask_and_63_and_31:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    ori $a2, $zero, 32
-; LA64-NEXT:    sub.d $a1, $a2, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
 ; LA64-NEXT:    rotr.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %a = and i32 %y, 63
@@ -192,8 +187,7 @@ define signext i32 @rotl_32_mask_or_64_or_32(i32 signext %x, i32 signext %y) nou
 ;
 ; LA64-LABEL: rotl_32_mask_or_64_or_32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    ori $a2, $zero, 32
-; LA64-NEXT:    sub.d $a1, $a2, $a1
+; LA64-NEXT:    sub.d $a1, $zero, $a1
 ; LA64-NEXT:    rotr.w $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %a = or i32 %y, 64

@heiher heiher requested review from SixWeining and wangleiat July 24, 2024 12:33
@SixWeining
Copy link
Contributor

@wangleiat What do you think?

@topperc
Copy link
Collaborator

topperc commented Jul 25, 2024

Since LoongArch copied some code from RISC-V, I think you can do this with these 2 tablegen changes.

diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index ec0d071453c3..030e3500d528 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -1119,7 +1119,7 @@ def : PatGprGpr<urem, MOD_WU>;
 def : PatGprGpr<mul, MUL_W>;
 def : PatGprGpr<mulhs, MULH_W>;
 def : PatGprGpr<mulhu, MULH_WU>;
-def : PatGprGpr<rotr, ROTR_W>;
+def : PatGprGpr<shiftop<rotr>, ROTR_W>;
 def : PatGprImm<rotr, ROTRI_W, uimm5>;
 
 foreach Idx = 1...3 in {
@@ -1143,7 +1143,7 @@ def : PatGprGpr_32<srem, MOD_W>;
 def : PatGprGpr<urem, MOD_DU>;
 def : PatGprGpr<loongarch_mod_wu, MOD_WU>;
 def : PatGprGpr<rotr, ROTR_D>;
-def : PatGprGpr<loongarch_rotr_w, ROTR_W>;
+def : PatGprGpr<shiftopw<loongarch_rotr_w>, ROTR_W>;
 def : PatGprImm<rotr, ROTRI_D, uimm6>;
 def : PatGprImm_32<rotr, ROTRI_W, uimm5>;
 def : PatGprImm<loongarch_rotr_w, ROTRI_W, uimm5>;

@heiher
Copy link
Member Author

heiher commented Jul 25, 2024

@topperc Thanks for your suggestion. Your solution is indeed simpler and achieves the same functionality. I'm happy to adopt your approach.

Copy link
Contributor

@wangleiat wangleiat left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, thanks.

@heiher heiher merged commit 3e2631c into llvm:main Jul 30, 2024
7 checks passed
@heiher heiher deleted the opt-rotl branch July 30, 2024 06:22
@llvm-ci
Copy link
Collaborator

llvm-ci commented Jul 30, 2024

LLVM Buildbot has detected a new failure on builder premerge-monolithic-linux running on premerge-linux-1 while building llvm at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/4584

Here is the relevant piece of the build log for the reference:

Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'AddressSanitizer-x86_64-linux-dynamic :: TestCases/Posix/halt_on_error-torture.cpp' FAILED ********************
Exit Code: 1

Command Output (stderr):
--
RUN: at line 3: /build/buildbot/premerge-monolithic-linux/build/./bin/clang  --driver-mode=g++ -fsanitize=address -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -gline-tables-only  -m64  -shared-libasan -fsanitize-recover=address -pthread /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/asan/TestCases/Posix/halt_on_error-torture.cpp -o /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp
+ /build/buildbot/premerge-monolithic-linux/build/./bin/clang --driver-mode=g++ -fsanitize=address -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -gline-tables-only -m64 -shared-libasan -fsanitize-recover=address -pthread /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/asan/TestCases/Posix/halt_on_error-torture.cpp -o /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp
RUN: at line 5: env ASAN_OPTIONS=halt_on_error=false:suppress_equal_pcs=false  /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp 1 10 >/build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp.log 2>&1
+ env ASAN_OPTIONS=halt_on_error=false:suppress_equal_pcs=false /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp 1 10
RUN: at line 6: grep 'ERROR: AddressSanitizer: use-after-poison' /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp.log | count 10
+ grep 'ERROR: AddressSanitizer: use-after-poison' /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp.log
+ count 10
RUN: at line 7: FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/asan/TestCases/Posix/halt_on_error-torture.cpp </build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp.log
+ FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/compiler-rt/test/asan/TestCases/Posix/halt_on_error-torture.cpp
RUN: at line 9: env ASAN_OPTIONS=halt_on_error=false:suppress_equal_pcs=false:exitcode=0  /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp 10 20 >/build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp.log 2>&1
+ env ASAN_OPTIONS=halt_on_error=false:suppress_equal_pcs=false:exitcode=0 /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp 10 20
RUN: at line 10: grep 'ERROR: AddressSanitizer: use-after-poison' /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp.log | count 200
+ grep 'ERROR: AddressSanitizer: use-after-poison' /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp.log
+ count 200
grep: /build/buildbot/premerge-monolithic-linux/build/runtimes/runtimes-bins/compiler-rt/test/asan/X86_64LinuxDynamicConfig/TestCases/Posix/Output/halt_on_error-torture.cpp.tmp.log: binary file matches
Expected 200 lines, got 0.

--

********************


Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

6 participants