-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[SPARC] Promote i32 CTTZ when we have VIS3 #135894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARC] Promote i32 CTTZ when we have VIS3 #135894
Conversation
Created using spr 1.3.5 [skip ci]
Created using spr 1.3.5
@llvm/pr-subscribers-backend-sparc Author: Koakuma (koachan) ChangesCTTZ can be implemented in terms of CTLZ, for which there's a native Full diff: https://github.com/llvm/llvm-project/pull/135894.diff 2 Files Affected:
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index c7dd96757ebb5..bbc15c3ebf4dc 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1820,7 +1820,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
setOperationAction(ISD::FREM , MVT::f32, Expand);
setOperationAction(ISD::FMA , MVT::f32, Expand);
- setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32,
+ Subtarget->isVIS3() ? Promote : Expand);
setOperationAction(ISD::CTLZ, MVT::i32,
Subtarget->isVIS3() ? Promote : Expand);
setOperationAction(ISD::ROTL , MVT::i32, Expand);
@@ -1998,6 +1999,8 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
if (Subtarget->isVIS3()) {
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Legal);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Promote);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
}
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
diff --git a/llvm/test/CodeGen/SPARC/cttz.ll b/llvm/test/CodeGen/SPARC/cttz.ll
index eab2433a074a2..bc3fa13c105b7 100644
--- a/llvm/test/CodeGen/SPARC/cttz.ll
+++ b/llvm/test/CodeGen/SPARC/cttz.ll
@@ -1,70 +1,195 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=sparc -mcpu=v9 | FileCheck %s
+; RUN: llc < %s -mtriple=sparcv9 | FileCheck %s -check-prefix=V9
+; RUN: llc < %s -mtriple=sparcv9 -mattr=popc | FileCheck %s -check-prefix=POPC
+; RUN: llc < %s -mtriple=sparcv9 -mattr=vis3 | FileCheck %s -check-prefix=VIS3
-define i32 @f(i32 %x) {
-; CHECK-LABEL: f:
-; CHECK: .cfi_startproc
-; CHECK-NEXT: ! %bb.0: ! %entry
-; CHECK-NEXT: sub %g0, %o0, %o1
-; CHECK-NEXT: and %o0, %o1, %o1
-; CHECK-NEXT: sethi 122669, %o2
-; CHECK-NEXT: or %o2, 305, %o2
-; CHECK-NEXT: smul %o1, %o2, %o1
-; CHECK-NEXT: srl %o1, 27, %o1
-; CHECK-NEXT: sethi %hi(.LCPI0_0), %o2
-; CHECK-NEXT: add %o2, %lo(.LCPI0_0), %o2
-; CHECK-NEXT: ldub [%o2+%o1], %o1
-; CHECK-NEXT: cmp %o0, 0
-; CHECK-NEXT: move %icc, 0, %o1
-; CHECK-NEXT: retl
-; CHECK-NEXT: mov %o1, %o0
-entry:
- %0 = call i32 @llvm.cttz.i32(i32 %x, i1 true)
- %1 = icmp eq i32 %x, 0
- %2 = select i1 %1, i32 0, i32 %0
- %3 = trunc i32 %2 to i8
- %conv = zext i8 %3 to i32
- ret i32 %conv
+define i32 @i32_nopoison(i32 %x) nounwind {
+; V9-LABEL: i32_nopoison:
+; V9: ! %bb.0:
+; V9-NEXT: cmp %o0, 0
+; V9-NEXT: be %icc, .LBB0_2
+; V9-NEXT: nop
+; V9-NEXT: ! %bb.1: ! %cond.false
+; V9-NEXT: sub %g0, %o0, %o1
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: sethi 122669, %o1
+; V9-NEXT: or %o1, 305, %o1
+; V9-NEXT: mulx %o0, %o1, %o0
+; V9-NEXT: srl %o0, 27, %o0
+; V9-NEXT: srl %o0, 0, %o0
+; V9-NEXT: sethi %h44(.LCPI0_0), %o1
+; V9-NEXT: add %o1, %m44(.LCPI0_0), %o1
+; V9-NEXT: sllx %o1, 12, %o1
+; V9-NEXT: add %o1, %l44(.LCPI0_0), %o1
+; V9-NEXT: retl
+; V9-NEXT: ldub [%o1+%o0], %o0
+; V9-NEXT: .LBB0_2:
+; V9-NEXT: retl
+; V9-NEXT: mov 32, %o0
+;
+; POPC-LABEL: i32_nopoison:
+; POPC: ! %bb.0:
+; POPC-NEXT: cmp %o0, 0
+; POPC-NEXT: be %icc, .LBB0_2
+; POPC-NEXT: nop
+; POPC-NEXT: ! %bb.1: ! %cond.false
+; POPC-NEXT: add %o0, -1, %o1
+; POPC-NEXT: andn %o1, %o0, %o0
+; POPC-NEXT: srl %o0, 0, %o0
+; POPC-NEXT: retl
+; POPC-NEXT: popc %o0, %o0
+; POPC-NEXT: .LBB0_2:
+; POPC-NEXT: retl
+; POPC-NEXT: mov 32, %o0
+;
+; VIS3-LABEL: i32_nopoison:
+; VIS3: ! %bb.0:
+; VIS3-NEXT: cmp %o0, 0
+; VIS3-NEXT: be %icc, .LBB0_2
+; VIS3-NEXT: nop
+; VIS3-NEXT: ! %bb.1: ! %cond.false
+; VIS3-NEXT: add %o0, -1, %o1
+; VIS3-NEXT: andn %o1, %o0, %o0
+; VIS3-NEXT: lzcnt %o0, %o0
+; VIS3-NEXT: mov 64, %o1
+; VIS3-NEXT: retl
+; VIS3-NEXT: sub %o1, %o0, %o0
+; VIS3-NEXT: .LBB0_2:
+; VIS3-NEXT: retl
+; VIS3-NEXT: mov 32, %o0
+ %ret = call i32 @llvm.cttz.i32(i32 %x, i1 false)
+ ret i32 %ret
}
-define i64 @g(i64 %x) {
-; CHECK-LABEL: g:
-; CHECK: .cfi_startproc
-; CHECK-NEXT: ! %bb.0: ! %entry
-; CHECK-NEXT: sub %g0, %o1, %o2
-; CHECK-NEXT: and %o1, %o2, %o2
-; CHECK-NEXT: sethi 122669, %o3
-; CHECK-NEXT: or %o3, 305, %o3
-; CHECK-NEXT: smul %o2, %o3, %o2
-; CHECK-NEXT: sethi %hi(.LCPI1_0), %o4
-; CHECK-NEXT: add %o4, %lo(.LCPI1_0), %o4
-; CHECK-NEXT: sub %g0, %o0, %o5
-; CHECK-NEXT: and %o0, %o5, %o5
-; CHECK-NEXT: smul %o5, %o3, %o3
-; CHECK-NEXT: srl %o3, 27, %o3
-; CHECK-NEXT: ldub [%o4+%o3], %o3
-; CHECK-NEXT: srl %o2, 27, %o2
-; CHECK-NEXT: ldub [%o4+%o2], %o4
-; CHECK-NEXT: add %o3, 32, %o2
-; CHECK-NEXT: cmp %o1, 0
-; CHECK-NEXT: movne %icc, %o4, %o2
-; CHECK-NEXT: or %o1, %o0, %o0
-; CHECK-NEXT: cmp %o0, 0
-; CHECK-NEXT: move %icc, 0, %o2
-; CHECK-NEXT: mov %g0, %o0
-; CHECK-NEXT: retl
-; CHECK-NEXT: mov %o2, %o1
-entry:
- %0 = call i64 @llvm.cttz.i64(i64 %x, i1 true)
- %1 = icmp eq i64 %x, 0
- %2 = select i1 %1, i64 0, i64 %0
- %3 = trunc i64 %2 to i32
- %conv = zext i32 %3 to i64
- ret i64 %conv
+define i32 @i32_poison(i32 %x) nounwind {
+; V9-LABEL: i32_poison:
+; V9: ! %bb.0:
+; V9-NEXT: sub %g0, %o0, %o1
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: sethi 122669, %o1
+; V9-NEXT: or %o1, 305, %o1
+; V9-NEXT: mulx %o0, %o1, %o0
+; V9-NEXT: srl %o0, 27, %o0
+; V9-NEXT: srl %o0, 0, %o0
+; V9-NEXT: sethi %h44(.LCPI1_0), %o1
+; V9-NEXT: add %o1, %m44(.LCPI1_0), %o1
+; V9-NEXT: sllx %o1, 12, %o1
+; V9-NEXT: add %o1, %l44(.LCPI1_0), %o1
+; V9-NEXT: retl
+; V9-NEXT: ldub [%o1+%o0], %o0
+;
+; POPC-LABEL: i32_poison:
+; POPC: ! %bb.0:
+; POPC-NEXT: add %o0, -1, %o1
+; POPC-NEXT: andn %o1, %o0, %o0
+; POPC-NEXT: srl %o0, 0, %o0
+; POPC-NEXT: retl
+; POPC-NEXT: popc %o0, %o0
+;
+; VIS3-LABEL: i32_poison:
+; VIS3: ! %bb.0:
+; VIS3-NEXT: add %o0, -1, %o1
+; VIS3-NEXT: andn %o1, %o0, %o0
+; VIS3-NEXT: lzcnt %o0, %o0
+; VIS3-NEXT: mov 64, %o1
+; VIS3-NEXT: retl
+; VIS3-NEXT: sub %o1, %o0, %o0
+ %ret = call i32 @llvm.cttz.i32(i32 %x, i1 true)
+ ret i32 %ret
}
-; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare i32 @llvm.cttz.i32(i32, i1 immarg) #0
-declare i64 @llvm.cttz.i64(i64, i1 immarg) #0
+define i64 @i64_nopoison(i64 %x) nounwind {
+; V9-LABEL: i64_nopoison:
+; V9: ! %bb.0:
+; V9-NEXT: brz %o0, .LBB2_2
+; V9-NEXT: nop
+; V9-NEXT: ! %bb.1: ! %cond.false
+; V9-NEXT: sub %g0, %o0, %o1
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: sethi 3362647, %o1
+; V9-NEXT: or %o1, 447, %o1
+; V9-NEXT: sethi 34344, %o2
+; V9-NEXT: or %o2, 914, %o2
+; V9-NEXT: sllx %o2, 32, %o2
+; V9-NEXT: or %o2, %o1, %o1
+; V9-NEXT: mulx %o0, %o1, %o0
+; V9-NEXT: srlx %o0, 58, %o0
+; V9-NEXT: sethi %h44(.LCPI2_0), %o1
+; V9-NEXT: add %o1, %m44(.LCPI2_0), %o1
+; V9-NEXT: sllx %o1, 12, %o1
+; V9-NEXT: add %o1, %l44(.LCPI2_0), %o1
+; V9-NEXT: retl
+; V9-NEXT: ldub [%o1+%o0], %o0
+; V9-NEXT: .LBB2_2:
+; V9-NEXT: retl
+; V9-NEXT: mov 64, %o0
+;
+; POPC-LABEL: i64_nopoison:
+; POPC: ! %bb.0:
+; POPC-NEXT: brz %o0, .LBB2_2
+; POPC-NEXT: nop
+; POPC-NEXT: ! %bb.1: ! %cond.false
+; POPC-NEXT: add %o0, -1, %o1
+; POPC-NEXT: andn %o1, %o0, %o0
+; POPC-NEXT: retl
+; POPC-NEXT: popc %o0, %o0
+; POPC-NEXT: .LBB2_2:
+; POPC-NEXT: retl
+; POPC-NEXT: mov 64, %o0
+;
+; VIS3-LABEL: i64_nopoison:
+; VIS3: ! %bb.0:
+; VIS3-NEXT: brz %o0, .LBB2_2
+; VIS3-NEXT: nop
+; VIS3-NEXT: ! %bb.1: ! %cond.false
+; VIS3-NEXT: add %o0, -1, %o1
+; VIS3-NEXT: andn %o1, %o0, %o0
+; VIS3-NEXT: lzcnt %o0, %o0
+; VIS3-NEXT: mov 64, %o1
+; VIS3-NEXT: retl
+; VIS3-NEXT: sub %o1, %o0, %o0
+; VIS3-NEXT: .LBB2_2:
+; VIS3-NEXT: retl
+; VIS3-NEXT: mov 64, %o0
+ %ret = call i64 @llvm.cttz.i64(i64 %x, i1 false)
+ ret i64 %ret
+}
-attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+define i64 @i64_poison(i64 %x) nounwind {
+; V9-LABEL: i64_poison:
+; V9: ! %bb.0:
+; V9-NEXT: sub %g0, %o0, %o1
+; V9-NEXT: and %o0, %o1, %o0
+; V9-NEXT: sethi 3362647, %o1
+; V9-NEXT: or %o1, 447, %o1
+; V9-NEXT: sethi 34344, %o2
+; V9-NEXT: or %o2, 914, %o2
+; V9-NEXT: sllx %o2, 32, %o2
+; V9-NEXT: or %o2, %o1, %o1
+; V9-NEXT: mulx %o0, %o1, %o0
+; V9-NEXT: srlx %o0, 58, %o0
+; V9-NEXT: sethi %h44(.LCPI3_0), %o1
+; V9-NEXT: add %o1, %m44(.LCPI3_0), %o1
+; V9-NEXT: sllx %o1, 12, %o1
+; V9-NEXT: add %o1, %l44(.LCPI3_0), %o1
+; V9-NEXT: retl
+; V9-NEXT: ldub [%o1+%o0], %o0
+;
+; POPC-LABEL: i64_poison:
+; POPC: ! %bb.0:
+; POPC-NEXT: add %o0, -1, %o1
+; POPC-NEXT: andn %o1, %o0, %o0
+; POPC-NEXT: retl
+; POPC-NEXT: popc %o0, %o0
+;
+; VIS3-LABEL: i64_poison:
+; VIS3: ! %bb.0:
+; VIS3-NEXT: add %o0, -1, %o1
+; VIS3-NEXT: andn %o1, %o0, %o0
+; VIS3-NEXT: lzcnt %o0, %o0
+; VIS3-NEXT: mov 64, %o1
+; VIS3-NEXT: retl
+; VIS3-NEXT: sub %o1, %o0, %o0
+ %ret = call i64 @llvm.cttz.i64(i64 %x, i1 true)
+ ret i64 %ret
+}
|
Created using spr 1.3.5 [skip ci]
Created using spr 1.3.5
Created using spr 1.3.5 [skip ci]
Created using spr 1.3.5
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one note
Created using spr 1.3.5 [skip ci]
Created using spr 1.3.5
Is something blocking this PR? |
Wait yeah I forgot about this, sorry sorry... |
Created using spr 1.3.5 [skip ci]
Created using spr 1.3.5
CTTZ can be implemented in terms of CTLZ, for which there's a native instruction in VIS3. Promote i32 CTTZ in that case so that the native instruction gets used. Reviewers: rorth, brad0, s-barannikov Reviewed By: s-barannikov Pull Request: llvm#135894
CTTZ can be implemented in terms of CTLZ, for which there's a native instruction in VIS3. Promote i32 CTTZ in that case so that the native instruction gets used. Reviewers: rorth, brad0, s-barannikov Reviewed By: s-barannikov Pull Request: llvm/llvm-project#135894
CTTZ can be implemented in terms of CTLZ, for which there's a native instruction in VIS3. Promote i32 CTTZ in that case so that the native instruction gets used. Reviewers: rorth, brad0, s-barannikov Reviewed By: s-barannikov Pull Request: llvm#135894
CTTZ can be implemented in terms of CTLZ, for which there's a native instruction in VIS3. Promote i32 CTTZ in that case so that the native instruction gets used. Reviewers: rorth, brad0, s-barannikov Reviewed By: s-barannikov Pull Request: llvm#135894
CTTZ can be implemented in terms of CTLZ, for which there's a native instruction in VIS3. Promote i32 CTTZ in that case so that the native instruction gets used. Reviewers: rorth, brad0, s-barannikov Reviewed By: s-barannikov Pull Request: llvm#135894
CTTZ can be implemented in terms of CTLZ, for which there's a native instruction in VIS3. Promote i32 CTTZ in that case so that the native instruction gets used. Reviewers: rorth, brad0, s-barannikov Reviewed By: s-barannikov Pull Request: llvm#135894
CTTZ can be implemented in terms of CTLZ, for which there's a native
instruction in VIS3.
Promote i32 CTTZ in that case so that the native instruction gets used.