Skip to content

Commit 5b5dd06

Browse files
committed
[NVPTX] Add TTI support for folding isspacep in InferAS
1 parent 6ff8091 commit 5b5dd06

File tree

3 files changed

+209
-24
lines changed

3 files changed

+209
-24
lines changed

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp

Lines changed: 59 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -416,33 +416,34 @@ static Instruction *convertNvvmIntrinsicToLlvm(InstCombiner &IC,
416416
llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
417417
}
418418

419+
// Returns true/false when we know the answer, nullopt otherwise.
420+
static std::optional<bool> evaluateIsSpace(Intrinsic::ID IID, unsigned AS) {
421+
if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
422+
AS == NVPTXAS::ADDRESS_SPACE_PARAM)
423+
return std::nullopt; // Got to check at run-time.
424+
switch (IID) {
425+
case Intrinsic::nvvm_isspacep_global:
426+
return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
427+
case Intrinsic::nvvm_isspacep_local:
428+
return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
429+
case Intrinsic::nvvm_isspacep_shared:
430+
return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
431+
case Intrinsic::nvvm_isspacep_shared_cluster:
432+
// We can't tell shared from shared_cluster at compile time from AS alone,
433+
// but it can't be either is AS is not shared.
434+
return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
435+
: std::optional{false};
436+
case Intrinsic::nvvm_isspacep_const:
437+
return AS == NVPTXAS::ADDRESS_SPACE_CONST;
438+
default:
439+
llvm_unreachable("Unexpected intrinsic");
440+
}
441+
}
442+
419443
// Returns an instruction pointer (may be nullptr if we do not know the answer).
420444
// Returns nullopt if `II` is not one of the `isspacep` intrinsics.
421445
static std::optional<Instruction *>
422446
handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
423-
// Returns true/false when we know the answer, nullopt otherwise.
424-
auto CheckASMatch = [](unsigned IID, unsigned AS) -> std::optional<bool> {
425-
if (AS == NVPTXAS::ADDRESS_SPACE_GENERIC ||
426-
AS == NVPTXAS::ADDRESS_SPACE_PARAM)
427-
return std::nullopt; // Got to check at run-time.
428-
switch (IID) {
429-
case Intrinsic::nvvm_isspacep_global:
430-
return AS == NVPTXAS::ADDRESS_SPACE_GLOBAL;
431-
case Intrinsic::nvvm_isspacep_local:
432-
return AS == NVPTXAS::ADDRESS_SPACE_LOCAL;
433-
case Intrinsic::nvvm_isspacep_shared:
434-
return AS == NVPTXAS::ADDRESS_SPACE_SHARED;
435-
case Intrinsic::nvvm_isspacep_shared_cluster:
436-
// We can't tell shared from shared_cluster at compile time from AS alone,
437-
// but it can't be either is AS is not shared.
438-
return AS == NVPTXAS::ADDRESS_SPACE_SHARED ? std::nullopt
439-
: std::optional{false};
440-
case Intrinsic::nvvm_isspacep_const:
441-
return AS == NVPTXAS::ADDRESS_SPACE_CONST;
442-
default:
443-
llvm_unreachable("Unexpected intrinsic");
444-
}
445-
};
446447

447448
switch (auto IID = II.getIntrinsicID()) {
448449
case Intrinsic::nvvm_isspacep_global:
@@ -458,7 +459,7 @@ handleSpaceCheckIntrinsics(InstCombiner &IC, IntrinsicInst &II) {
458459
if (auto *ASCO = dyn_cast<AddrSpaceCastOperator>(Op0))
459460
AS = ASCO->getOperand(0)->getType()->getPointerAddressSpace();
460461

461-
if (std::optional<bool> Answer = CheckASMatch(IID, AS))
462+
if (std::optional<bool> Answer = evaluateIsSpace(IID, AS))
462463
return IC.replaceInstUsesWith(II,
463464
ConstantInt::get(II.getType(), *Answer));
464465
return nullptr; // Don't know the answer, got to check at run time.
@@ -525,3 +526,37 @@ void NVPTXTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
525526
TTI::PeelingPreferences &PP) {
526527
BaseT::getPeelingPreferences(L, SE, PP);
527528
}
529+
530+
bool NVPTXTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
531+
Intrinsic::ID IID) const {
532+
switch (IID) {
533+
case Intrinsic::nvvm_isspacep_const:
534+
case Intrinsic::nvvm_isspacep_global:
535+
case Intrinsic::nvvm_isspacep_local:
536+
case Intrinsic::nvvm_isspacep_shared:
537+
case Intrinsic::nvvm_isspacep_shared_cluster: {
538+
OpIndexes.push_back(0);
539+
return true;
540+
}
541+
}
542+
return false;
543+
}
544+
545+
Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
546+
Value *OldV,
547+
Value *NewV) const {
548+
const Intrinsic::ID IID = II->getIntrinsicID();
549+
switch (IID) {
550+
case Intrinsic::nvvm_isspacep_const:
551+
case Intrinsic::nvvm_isspacep_global:
552+
case Intrinsic::nvvm_isspacep_local:
553+
case Intrinsic::nvvm_isspacep_shared:
554+
case Intrinsic::nvvm_isspacep_shared_cluster: {
555+
const unsigned NewAS = NewV->getType()->getPointerAddressSpace();
556+
if (const auto R = evaluateIsSpace(IID, NewAS))
557+
return ConstantInt::get(II->getType(), *R);
558+
return nullptr;
559+
}
560+
}
561+
return nullptr;
562+
}

llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {
123123
return true;
124124
}
125125
}
126+
127+
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
128+
Intrinsic::ID IID) const;
129+
130+
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
131+
Value *NewV) const;
126132
};
127133

128134
} // end namespace llvm
Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=nvptx64-nvidia-cuda -passes=infer-address-spaces,bdce %s | FileCheck %s
3+
4+
target datalayout = "e-p:64:64:64-p3:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-i128:128:128-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
5+
target triple = "nvptx64-nvidia-cuda"
6+
7+
declare i1 @llvm.nvvm.isspacep.const(ptr) readnone noinline
8+
declare i1 @llvm.nvvm.isspacep.global(ptr) readnone noinline
9+
declare i1 @llvm.nvvm.isspacep.local(ptr) readnone noinline
10+
declare i1 @llvm.nvvm.isspacep.shared(ptr) readnone noinline
11+
declare i1 @llvm.nvvm.isspacep.cluster.shared(ptr) readnone noinline
12+
13+
define i1 @test_isspacep_const_true(ptr addrspace(4) %addr) {
14+
; CHECK-LABEL: define i1 @test_isspacep_const_true(
15+
; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
16+
; CHECK-NEXT: [[ENTRY:.*:]]
17+
; CHECK-NEXT: ret i1 true
18+
;
19+
entry:
20+
%addr0 = addrspacecast ptr addrspace(4) %addr to ptr
21+
%addr1 = getelementptr i8, ptr %addr0, i32 10
22+
%val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
23+
ret i1 %val
24+
}
25+
26+
define i1 @test_isspacep_const_false(ptr addrspace(1) %addr) {
27+
; CHECK-LABEL: define i1 @test_isspacep_const_false(
28+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
29+
; CHECK-NEXT: [[ENTRY:.*:]]
30+
; CHECK-NEXT: ret i1 false
31+
;
32+
entry:
33+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
34+
%addr1 = getelementptr i8, ptr %addr0, i32 10
35+
%val = call i1 @llvm.nvvm.isspacep.const(ptr %addr1)
36+
ret i1 %val
37+
}
38+
39+
define i1 @test_isspacep_global_true(ptr addrspace(1) %addr) {
40+
; CHECK-LABEL: define i1 @test_isspacep_global_true(
41+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
42+
; CHECK-NEXT: [[ENTRY:.*:]]
43+
; CHECK-NEXT: ret i1 true
44+
;
45+
entry:
46+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
47+
%addr1 = getelementptr i8, ptr %addr0, i32 10
48+
%val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
49+
ret i1 %val
50+
}
51+
52+
define i1 @test_isspacep_global_false(ptr addrspace(4) %addr) {
53+
; CHECK-LABEL: define i1 @test_isspacep_global_false(
54+
; CHECK-SAME: ptr addrspace(4) [[ADDR:%.*]]) {
55+
; CHECK-NEXT: [[ENTRY:.*:]]
56+
; CHECK-NEXT: ret i1 false
57+
;
58+
entry:
59+
%addr0 = addrspacecast ptr addrspace(4) %addr to ptr
60+
%addr1 = getelementptr i8, ptr %addr0, i32 10
61+
%val = call i1 @llvm.nvvm.isspacep.global(ptr %addr1)
62+
ret i1 %val
63+
}
64+
65+
define i1 @test_isspacep_local_true(ptr addrspace(5) %addr) {
66+
; CHECK-LABEL: define i1 @test_isspacep_local_true(
67+
; CHECK-SAME: ptr addrspace(5) [[ADDR:%.*]]) {
68+
; CHECK-NEXT: [[ENTRY:.*:]]
69+
; CHECK-NEXT: ret i1 true
70+
;
71+
entry:
72+
%addr0 = addrspacecast ptr addrspace(5) %addr to ptr
73+
%addr1 = getelementptr i8, ptr %addr0, i32 10
74+
%val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
75+
ret i1 %val
76+
}
77+
78+
define i1 @test_isspacep_local_false(ptr addrspace(1) %addr) {
79+
; CHECK-LABEL: define i1 @test_isspacep_local_false(
80+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
81+
; CHECK-NEXT: [[ENTRY:.*:]]
82+
; CHECK-NEXT: ret i1 false
83+
;
84+
entry:
85+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
86+
%addr1 = getelementptr i8, ptr %addr0, i32 10
87+
%val = call i1 @llvm.nvvm.isspacep.local(ptr %addr1)
88+
ret i1 %val
89+
}
90+
91+
define i1 @test_isspacep_shared_true(ptr addrspace(3) %addr) {
92+
; CHECK-LABEL: define i1 @test_isspacep_shared_true(
93+
; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
94+
; CHECK-NEXT: [[ENTRY:.*:]]
95+
; CHECK-NEXT: ret i1 true
96+
;
97+
entry:
98+
%addr0 = addrspacecast ptr addrspace(3) %addr to ptr
99+
%addr1 = getelementptr i8, ptr %addr0, i32 10
100+
%val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
101+
ret i1 %val
102+
}
103+
104+
define i1 @test_isspacep_shared_false(ptr addrspace(1) %addr) {
105+
; CHECK-LABEL: define i1 @test_isspacep_shared_false(
106+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
107+
; CHECK-NEXT: [[ENTRY:.*:]]
108+
; CHECK-NEXT: ret i1 false
109+
;
110+
entry:
111+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
112+
%addr1 = getelementptr i8, ptr %addr0, i32 10
113+
%val = call i1 @llvm.nvvm.isspacep.shared(ptr %addr1)
114+
ret i1 %val
115+
}
116+
117+
define i1 @test_isspacep_cluster_shared_unsure(ptr addrspace(3) %addr) {
118+
; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_unsure(
119+
; CHECK-SAME: ptr addrspace(3) [[ADDR:%.*]]) {
120+
; CHECK-NEXT: [[ENTRY:.*:]]
121+
; CHECK-NEXT: [[ADDR1:%.*]] = getelementptr i8, ptr addrspace(3) [[ADDR]], i32 10
122+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) [[ADDR1]] to ptr
123+
; CHECK-NEXT: [[VAL:%.*]] = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr [[TMP0]])
124+
; CHECK-NEXT: ret i1 [[VAL]]
125+
;
126+
entry:
127+
%addr0 = addrspacecast ptr addrspace(3) %addr to ptr
128+
%addr1 = getelementptr i8, ptr %addr0, i32 10
129+
%val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
130+
ret i1 %val
131+
}
132+
133+
define i1 @test_isspacep_cluster_shared_false(ptr addrspace(1) %addr) {
134+
; CHECK-LABEL: define i1 @test_isspacep_cluster_shared_false(
135+
; CHECK-SAME: ptr addrspace(1) [[ADDR:%.*]]) {
136+
; CHECK-NEXT: [[ENTRY:.*:]]
137+
; CHECK-NEXT: ret i1 false
138+
;
139+
entry:
140+
%addr0 = addrspacecast ptr addrspace(1) %addr to ptr
141+
%addr1 = getelementptr i8, ptr %addr0, i32 10
142+
%val = call i1 @llvm.nvvm.isspacep.shared.cluster(ptr %addr1)
143+
ret i1 %val
144+
}

0 commit comments

Comments
 (0)