Skip to content

Commit 5955b3a

Browse files
committed
InferAddressSpaces: Handle prefetch intrinsic
1 parent a2ad627 commit 5955b3a

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,13 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
401401
II->setCalledFunction(NewDecl);
402402
return true;
403403
}
404+
case Intrinsic::prefetch: {
405+
Function *NewDecl =
406+
Intrinsic::getDeclaration(M, II->getIntrinsicID(), {NewV->getType()});
407+
II->setArgOperand(0, NewV);
408+
II->setCalledFunction(NewDecl);
409+
return true;
410+
}
404411
default: {
405412
Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
406413
if (!Rewrite)
@@ -423,6 +430,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
423430
PostorderStack, Visited);
424431
break;
425432
case Intrinsic::masked_gather:
433+
case Intrinsic::prefetch:
426434
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
427435
PostorderStack, Visited);
428436
break;
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=infer-address-spaces %s | FileCheck %s
3+
4+
define void @prefetch_shared_to_flat(ptr addrspace(3) %group.ptr) {
5+
; CHECK-LABEL: define void @prefetch_shared_to_flat(
6+
; CHECK-SAME: ptr addrspace(3) [[GROUP_PTR:%.*]]) {
7+
; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[GROUP_PTR]], i32 0, i32 0, i32 1)
8+
; CHECK-NEXT: ret void
9+
;
10+
%cast = addrspacecast ptr addrspace(3) %group.ptr to ptr
11+
tail call void @llvm.prefetch.p0(ptr %cast, i32 0, i32 0, i32 1)
12+
ret void
13+
}
14+
15+
define void @prefetch_global_to_flat(ptr addrspace(1) %global.ptr) {
16+
; CHECK-LABEL: define void @prefetch_global_to_flat(
17+
; CHECK-SAME: ptr addrspace(1) [[GLOBAL_PTR:%.*]]) {
18+
; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[GLOBAL_PTR]], i32 0, i32 0, i32 1)
19+
; CHECK-NEXT: ret void
20+
;
21+
%cast = addrspacecast ptr addrspace(1) %global.ptr to ptr
22+
tail call void @llvm.prefetch.p0(ptr addrspace(0) %cast, i32 0, i32 0, i32 1)
23+
ret void
24+
}
25+
26+
define void @prefetch_constant_to_flat(ptr addrspace(4) %const.ptr) {
27+
; CHECK-LABEL: define void @prefetch_constant_to_flat(
28+
; CHECK-SAME: ptr addrspace(4) [[CONST_PTR:%.*]]) {
29+
; CHECK-NEXT: tail call void @llvm.prefetch.p4(ptr addrspace(4) [[CONST_PTR]], i32 0, i32 0, i32 1)
30+
; CHECK-NEXT: ret void
31+
;
32+
%cast = addrspacecast ptr addrspace(4) %const.ptr to ptr
33+
tail call void @llvm.prefetch.p0(ptr %cast, i32 0, i32 0, i32 1)
34+
ret void
35+
}
36+
37+
define void @prefetch_flat_to_shared(ptr %flat.ptr) {
38+
; CHECK-LABEL: define void @prefetch_flat_to_shared(
39+
; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) {
40+
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3)
41+
; CHECK-NEXT: tail call void @llvm.prefetch.p3(ptr addrspace(3) [[CAST]], i32 0, i32 0, i32 1)
42+
; CHECK-NEXT: ret void
43+
;
44+
%cast = addrspacecast ptr %flat.ptr to ptr addrspace(3)
45+
tail call void @llvm.prefetch.p3(ptr addrspace(3) %cast, i32 0, i32 0, i32 1)
46+
ret void
47+
}
48+
49+
define void @prefetch_flat_to_global(ptr %flat.ptr) {
50+
; CHECK-LABEL: define void @prefetch_flat_to_global(
51+
; CHECK-SAME: ptr [[FLAT_PTR:%.*]]) {
52+
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(1)
53+
; CHECK-NEXT: tail call void @llvm.prefetch.p1(ptr addrspace(1) [[CAST]], i32 0, i32 0, i32 1)
54+
; CHECK-NEXT: ret void
55+
;
56+
%cast = addrspacecast ptr %flat.ptr to ptr addrspace(1)
57+
tail call void @llvm.prefetch.p1(ptr addrspace(1) %cast, i32 0, i32 0, i32 1)
58+
ret void
59+
}

0 commit comments

Comments
 (0)