-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Add is.shared/is.private intrinsics to isBoolSGPR #141804
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
AMDGPU: Add is.shared/is.private intrinsics to isBoolSGPR #141804
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesNo change in the net output since these ultimately expand to setcc, Full diff: https://github.com/llvm/llvm-project/pull/141804.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7ad10454e7931..b124f02d32a8a 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -11922,6 +11922,18 @@ bool llvm::isBoolSGPR(SDValue V) {
case ISD::SMULO:
case ISD::UMULO:
return V.getResNo() == 1;
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntrinsicID = V.getConstantOperandVal(0);
+ switch (IntrinsicID) {
+ case Intrinsic::amdgcn_is_shared:
+ case Intrinsic::amdgcn_is_private:
+ return true;
+ default:
+ return false;
+ }
+
+ return false;
+ }
}
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 1778fa42fbf7e..ba8abdc17fb05 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -740,6 +740,54 @@ bb:
ret void
}
+define i32 @add_sext_bool_is_shared(ptr %ptr, i32 %y) {
+; GCN-LABEL: add_sext_bool_is_shared:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], 0xe8
+; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
+; GCN-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v2, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_sext_bool_is_shared:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], src_shared_base
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s5, v1
+; GFX9-NEXT: v_subbrev_co_u32_e32 v0, vcc, 0, v2, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
+ %sext = sext i1 %is.shared to i32
+ %add = add i32 %sext, %y
+ ret i32 %add
+}
+
+define i32 @add_sext_bool_is_private(ptr %ptr, i32 %y) {
+; GCN-LABEL: add_sext_bool_is_private:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[4:5], 0xe4
+; GCN-NEXT: s_load_dword s4, s[4:5], 0x0
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s4, v1
+; GCN-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v2, vcc
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: add_sext_bool_is_private:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b64 s[4:5], src_private_base
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, s5, v1
+; GFX9-NEXT: v_subbrev_co_u32_e32 v0, vcc, 0, v2, vcc
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+ %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
+ %sext = sext i1 %is.private to i32
+ %add = add i32 %sext, %y
+ ret i32 %add
+}
+
declare i1 @llvm.amdgcn.class.f32(float, i32) #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
return false; | ||
} | ||
|
||
return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: llvm_unreachable?
2048248
to
7c026d8
Compare
No change in the net output since these ultimately expand to setcc, but saves a step in the DAG.
6967e64
to
52d2ff5
Compare
No change in the net output since these ultimately expand to setcc, but saves a step in the DAG.
No change in the net output since these ultimately expand to setcc, but saves a step in the DAG.
No change in the net output since these ultimately expand to setcc,
but saves a step in the DAG.