Skip to content

[SPARC] Promote i32 CTTZ when we have VIS3 #135894

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

Conversation

koachan
Copy link
Contributor

@koachan koachan commented Apr 16, 2025

CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.

koachan added 2 commits April 16, 2025 08:28
Created using spr 1.3.5
@llvmbot
Copy link
Member

llvmbot commented Apr 16, 2025

@llvm/pr-subscribers-backend-sparc

Author: Koakuma (koachan)

Changes

CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.


Full diff: https://github.com/llvm/llvm-project/pull/135894.diff

2 Files Affected:

  • (modified) llvm/lib/Target/Sparc/SparcISelLowering.cpp (+4-1)
  • (modified) llvm/test/CodeGen/SPARC/cttz.ll (+188-63)
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index c7dd96757ebb5..bbc15c3ebf4dc 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1820,7 +1820,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
   setOperationAction(ISD::FMA  , MVT::f32, Expand);
-  setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ, MVT::i32,
+                     Subtarget->isVIS3() ? Promote : Expand);
   setOperationAction(ISD::CTLZ, MVT::i32,
                      Subtarget->isVIS3() ? Promote : Expand);
   setOperationAction(ISD::ROTL , MVT::i32, Expand);
@@ -1998,6 +1999,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   if (Subtarget->isVIS3()) {
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Promote);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
   }
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
diff --git a/llvm/test/CodeGen/SPARC/cttz.ll b/llvm/test/CodeGen/SPARC/cttz.ll
index eab2433a074a2..bc3fa13c105b7 100644
--- a/llvm/test/CodeGen/SPARC/cttz.ll
+++ b/llvm/test/CodeGen/SPARC/cttz.ll
@@ -1,70 +1,195 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=sparc -mcpu=v9 | FileCheck %s
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
 
-define i32 @f(i32 %x) {
-; CHECK-LABEL: f:
-; CHECK:         .cfi_startproc
-; CHECK-NEXT:  ! %bb.0: ! %entry
-; CHECK-NEXT:    sub %g0, %o0, %o1
-; CHECK-NEXT:    and %o0, %o1, %o1
-; CHECK-NEXT:    sethi 122669, %o2
-; CHECK-NEXT:    or %o2, 305, %o2
-; CHECK-NEXT:    smul %o1, %o2, %o1
-; CHECK-NEXT:    srl %o1, 27, %o1
-; CHECK-NEXT:    sethi %hi(.LCPI0_0), %o2
-; CHECK-NEXT:    add %o2, %lo(.LCPI0_0), %o2
-; CHECK-NEXT:    ldub [%o2+%o1], %o1
-; CHECK-NEXT:    cmp %o0, 0
-; CHECK-NEXT:    move %icc, 0, %o1
-; CHECK-NEXT:    retl
-; CHECK-NEXT:    mov %o1, %o0
-entry:
-  %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true)
-  %1 = icmp eq i32 %x, 0
-  %2 = select i1 %1, i32 0, i32 %0
-  %3 = trunc i32 %2 to i8
-  %conv = zext i8 %3 to i32
-  ret i32 %conv
+define i32 @i32_nopoison(i32 %x) nounwind {
+; V9-LABEL: i32_nopoison:
+; V9:       ! %bb.0:
+; V9-NEXT:    cmp %o0, 0
+; V9-NEXT:    be %icc, .LBB0_2
+; V9-NEXT:    nop
+; V9-NEXT:  ! %bb.1: ! %cond.false
+; V9-NEXT:    sub %g0, %o0, %o1
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    sethi 122669, %o1
+; V9-NEXT:    or %o1, 305, %o1
+; V9-NEXT:    mulx %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 27, %o0
+; V9-NEXT:    srl %o0, 0, %o0
+; V9-NEXT:    sethi %h44(.LCPI0_0), %o1
+; V9-NEXT:    add %o1, %m44(.LCPI0_0), %o1
+; V9-NEXT:    sllx %o1, 12, %o1
+; V9-NEXT:    add %o1, %l44(.LCPI0_0), %o1
+; V9-NEXT:    retl
+; V9-NEXT:    ldub [%o1+%o0], %o0
+; V9-NEXT:  .LBB0_2:
+; V9-NEXT:    retl
+; V9-NEXT:    mov 32, %o0
+;
+; POPC-LABEL: i32_nopoison:
+; POPC:       ! %bb.0:
+; POPC-NEXT:    cmp %o0, 0
+; POPC-NEXT:    be %icc, .LBB0_2
+; POPC-NEXT:    nop
+; POPC-NEXT:  ! %bb.1: ! %cond.false
+; POPC-NEXT:    add %o0, -1, %o1
+; POPC-NEXT:    andn %o1, %o0, %o0
+; POPC-NEXT:    srl %o0, 0, %o0
+; POPC-NEXT:    retl
+; POPC-NEXT:    popc %o0, %o0
+; POPC-NEXT:  .LBB0_2:
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov 32, %o0
+;
+; VIS3-LABEL: i32_nopoison:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    cmp %o0, 0
+; VIS3-NEXT:    be %icc, .LBB0_2
+; VIS3-NEXT:    nop
+; VIS3-NEXT:  ! %bb.1: ! %cond.false
+; VIS3-NEXT:    add %o0, -1, %o1
+; VIS3-NEXT:    andn %o1, %o0, %o0
+; VIS3-NEXT:    lzcnt %o0, %o0
+; VIS3-NEXT:    mov 64, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    sub %o1, %o0, %o0
+; VIS3-NEXT:  .LBB0_2:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov 32, %o0
+  %ret = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+  ret i32 %ret
 }
 
-define i64 @g(i64 %x) {
-; CHECK-LABEL: g:
-; CHECK:         .cfi_startproc
-; CHECK-NEXT:  ! %bb.0: ! %entry
-; CHECK-NEXT:    sub %g0, %o1, %o2
-; CHECK-NEXT:    and %o1, %o2, %o2
-; CHECK-NEXT:    sethi 122669, %o3
-; CHECK-NEXT:    or %o3, 305, %o3
-; CHECK-NEXT:    smul %o2, %o3, %o2
-; CHECK-NEXT:    sethi %hi(.LCPI1_0), %o4
-; CHECK-NEXT:    add %o4, %lo(.LCPI1_0), %o4
-; CHECK-NEXT:    sub %g0, %o0, %o5
-; CHECK-NEXT:    and %o0, %o5, %o5
-; CHECK-NEXT:    smul %o5, %o3, %o3
-; CHECK-NEXT:    srl %o3, 27, %o3
-; CHECK-NEXT:    ldub [%o4+%o3], %o3
-; CHECK-NEXT:    srl %o2, 27, %o2
-; CHECK-NEXT:    ldub [%o4+%o2], %o4
-; CHECK-NEXT:    add %o3, 32, %o2
-; CHECK-NEXT:    cmp %o1, 0
-; CHECK-NEXT:    movne %icc, %o4, %o2
-; CHECK-NEXT:    or %o1, %o0, %o0
-; CHECK-NEXT:    cmp %o0, 0
-; CHECK-NEXT:    move %icc, 0, %o2
-; CHECK-NEXT:    mov %g0, %o0
-; CHECK-NEXT:    retl
-; CHECK-NEXT:    mov %o2, %o1
-entry:
-  %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true)
-  %1 = icmp eq i64 %x, 0
-  %2 = select i1 %1, i64 0, i64 %0
-  %3 = trunc i64 %2 to i32
-  %conv = zext i32 %3 to i64
-  ret i64 %conv
+define i32 @i32_poison(i32 %x) nounwind {
+; V9-LABEL: i32_poison:
+; V9:       ! %bb.0:
+; V9-NEXT:    sub %g0, %o0, %o1
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    sethi 122669, %o1
+; V9-NEXT:    or %o1, 305, %o1
+; V9-NEXT:    mulx %o0, %o1, %o0
+; V9-NEXT:    srl %o0, 27, %o0
+; V9-NEXT:    srl %o0, 0, %o0
+; V9-NEXT:    sethi %h44(.LCPI1_0), %o1
+; V9-NEXT:    add %o1, %m44(.LCPI1_0), %o1
+; V9-NEXT:    sllx %o1, 12, %o1
+; V9-NEXT:    add %o1, %l44(.LCPI1_0), %o1
+; V9-NEXT:    retl
+; V9-NEXT:    ldub [%o1+%o0], %o0
+;
+; POPC-LABEL: i32_poison:
+; POPC:       ! %bb.0:
+; POPC-NEXT:    add %o0, -1, %o1
+; POPC-NEXT:    andn %o1, %o0, %o0
+; POPC-NEXT:    srl %o0, 0, %o0
+; POPC-NEXT:    retl
+; POPC-NEXT:    popc %o0, %o0
+;
+; VIS3-LABEL: i32_poison:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    add %o0, -1, %o1
+; VIS3-NEXT:    andn %o1, %o0, %o0
+; VIS3-NEXT:    lzcnt %o0, %o0
+; VIS3-NEXT:    mov 64, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    sub %o1, %o0, %o0
+  %ret = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+  ret i32 %ret
 }
 
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare i32 @llvm.cttz.i32(i32, i1 immarg) #0
-declare i64 @llvm.cttz.i64(i64, i1 immarg) #0
+define i64 @i64_nopoison(i64 %x) nounwind {
+; V9-LABEL: i64_nopoison:
+; V9:       ! %bb.0:
+; V9-NEXT:    brz %o0, .LBB2_2
+; V9-NEXT:    nop
+; V9-NEXT:  ! %bb.1: ! %cond.false
+; V9-NEXT:    sub %g0, %o0, %o1
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    sethi 3362647, %o1
+; V9-NEXT:    or %o1, 447, %o1
+; V9-NEXT:    sethi 34344, %o2
+; V9-NEXT:    or %o2, 914, %o2
+; V9-NEXT:    sllx %o2, 32, %o2
+; V9-NEXT:    or %o2, %o1, %o1
+; V9-NEXT:    mulx %o0, %o1, %o0
+; V9-NEXT:    srlx %o0, 58, %o0
+; V9-NEXT:    sethi %h44(.LCPI2_0), %o1
+; V9-NEXT:    add %o1, %m44(.LCPI2_0), %o1
+; V9-NEXT:    sllx %o1, 12, %o1
+; V9-NEXT:    add %o1, %l44(.LCPI2_0), %o1
+; V9-NEXT:    retl
+; V9-NEXT:    ldub [%o1+%o0], %o0
+; V9-NEXT:  .LBB2_2:
+; V9-NEXT:    retl
+; V9-NEXT:    mov 64, %o0
+;
+; POPC-LABEL: i64_nopoison:
+; POPC:       ! %bb.0:
+; POPC-NEXT:    brz %o0, .LBB2_2
+; POPC-NEXT:    nop
+; POPC-NEXT:  ! %bb.1: ! %cond.false
+; POPC-NEXT:    add %o0, -1, %o1
+; POPC-NEXT:    andn %o1, %o0, %o0
+; POPC-NEXT:    retl
+; POPC-NEXT:    popc %o0, %o0
+; POPC-NEXT:  .LBB2_2:
+; POPC-NEXT:    retl
+; POPC-NEXT:    mov 64, %o0
+;
+; VIS3-LABEL: i64_nopoison:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    brz %o0, .LBB2_2
+; VIS3-NEXT:    nop
+; VIS3-NEXT:  ! %bb.1: ! %cond.false
+; VIS3-NEXT:    add %o0, -1, %o1
+; VIS3-NEXT:    andn %o1, %o0, %o0
+; VIS3-NEXT:    lzcnt %o0, %o0
+; VIS3-NEXT:    mov 64, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    sub %o1, %o0, %o0
+; VIS3-NEXT:  .LBB2_2:
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    mov 64, %o0
+  %ret = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+  ret i64 %ret
+}
 
-attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+define i64 @i64_poison(i64 %x) nounwind {
+; V9-LABEL: i64_poison:
+; V9:       ! %bb.0:
+; V9-NEXT:    sub %g0, %o0, %o1
+; V9-NEXT:    and %o0, %o1, %o0
+; V9-NEXT:    sethi 3362647, %o1
+; V9-NEXT:    or %o1, 447, %o1
+; V9-NEXT:    sethi 34344, %o2
+; V9-NEXT:    or %o2, 914, %o2
+; V9-NEXT:    sllx %o2, 32, %o2
+; V9-NEXT:    or %o2, %o1, %o1
+; V9-NEXT:    mulx %o0, %o1, %o0
+; V9-NEXT:    srlx %o0, 58, %o0
+; V9-NEXT:    sethi %h44(.LCPI3_0), %o1
+; V9-NEXT:    add %o1, %m44(.LCPI3_0), %o1
+; V9-NEXT:    sllx %o1, 12, %o1
+; V9-NEXT:    add %o1, %l44(.LCPI3_0), %o1
+; V9-NEXT:    retl
+; V9-NEXT:    ldub [%o1+%o0], %o0
+;
+; POPC-LABEL: i64_poison:
+; POPC:       ! %bb.0:
+; POPC-NEXT:    add %o0, -1, %o1
+; POPC-NEXT:    andn %o1, %o0, %o0
+; POPC-NEXT:    retl
+; POPC-NEXT:    popc %o0, %o0
+;
+; VIS3-LABEL: i64_poison:
+; VIS3:       ! %bb.0:
+; VIS3-NEXT:    add %o0, -1, %o1
+; VIS3-NEXT:    andn %o1, %o0, %o0
+; VIS3-NEXT:    lzcnt %o0, %o0
+; VIS3-NEXT:    mov 64, %o1
+; VIS3-NEXT:    retl
+; VIS3-NEXT:    sub %o1, %o0, %o0
+  %ret = call i64 @llvm.cttz.i64(i64 %x, i1 true)
+  ret i64 %ret
+}

koachan added 2 commits April 17, 2025 05:16
Created using spr 1.3.5

[skip ci]
Created using spr 1.3.5
@koachan koachan requested a review from s-barannikov April 16, 2025 22:17
koachan added 2 commits April 17, 2025 22:14
Created using spr 1.3.5

[skip ci]
Created using spr 1.3.5
@brad0 brad0 requested a review from s-barannikov April 20, 2025 04:46
Copy link
Contributor

@s-barannikov s-barannikov left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with one note

koachan added 2 commits April 20, 2025 14:51
Created using spr 1.3.5

[skip ci]
Created using spr 1.3.5
@s-barannikov
Copy link
Contributor

Is something blocking this PR?

@koachan
Copy link
Contributor Author

koachan commented Apr 26, 2025

Wait yeah I forgot about this, sorry sorry...
Lemme merge this~

koachan added 2 commits April 26, 2025 22:12
Created using spr 1.3.5

[skip ci]
Created using spr 1.3.5
@koachan koachan changed the base branch from users/koachan/spr/main.sparc-promote-i32-cttz-when-we-have-vis3 to main April 26, 2025 15:16
@koachan koachan merged commit 41f7a85 into main Apr 26, 2025
9 of 17 checks passed
@koachan koachan deleted the users/koachan/spr/sparc-promote-i32-cttz-when-we-have-vis3 branch April 26, 2025 15:16
jyli0116 pushed a commit to jyli0116/llvm-project that referenced this pull request Apr 28, 2025
CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.

Reviewers: rorth, brad0, s-barannikov

Reviewed By: s-barannikov

Pull Request: llvm#135894
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request May 6, 2025
CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.

Reviewers: rorth, brad0, s-barannikov

Reviewed By: s-barannikov

Pull Request: llvm/llvm-project#135894
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.

Reviewers: rorth, brad0, s-barannikov

Reviewed By: s-barannikov

Pull Request: llvm#135894
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.

Reviewers: rorth, brad0, s-barannikov

Reviewed By: s-barannikov

Pull Request: llvm#135894
IanWood1 pushed a commit to IanWood1/llvm-project that referenced this pull request May 6, 2025
CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.

Reviewers: rorth, brad0, s-barannikov

Reviewed By: s-barannikov

Pull Request: llvm#135894
Ankur-0429 pushed a commit to Ankur-0429/llvm-project that referenced this pull request May 9, 2025
CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.

Reviewers: rorth, brad0, s-barannikov

Reviewed By: s-barannikov

Pull Request: llvm#135894
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants