Skip to content

Commit 32f9983

Browse files
authored
[AMDGPU] - Add address space for strided buffers (#74471)
This is an experimental address space for strided buffers. These buffers can have structs as elements and a stride > 1. These pointers allow the indexed access in units of stride, i.e., they point at `buffer[index * stride]`. Thus, we can use the `idxen` modifier for buffer loads. We assign address space 9 to 192-bit buffer pointers which contain a 128-bit descriptor, a 32-bit offset and a 32-bit index. Essentially, they are fat buffer pointers with an additional 32-bit index.
1 parent 163aeca commit 32f9983

File tree

68 files changed

+247
-120
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+247
-120
lines changed

clang/lib/Basic/Targets/AMDGPU.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,10 @@ static const char *const DataLayoutStringR600 =
3232

3333
static const char *const DataLayoutStringAMDGCN =
3434
"e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
35-
"-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
35+
"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:"
36+
"32-v48:64-v96:128"
3637
"-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
37-
"-ni:7:8";
38+
"-ni:7:8:9";
3839

3940
const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
4041
llvm::AMDGPUAS::FLAT_ADDRESS, // Default

clang/test/CodeGen/target-data.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -176,12 +176,12 @@
176176

177177
// RUN: %clang_cc1 -triple amdgcn-unknown -target-cpu hawaii -o - -emit-llvm %s \
178178
// RUN: | FileCheck %s -check-prefix=R600SI
179-
// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
179+
// R600SI: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
180180

181181
// Test default -target-cpu
182182
// RUN: %clang_cc1 -triple amdgcn-unknown -o - -emit-llvm %s \
183183
// RUN: | FileCheck %s -check-prefix=R600SIDefault
184-
// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
184+
// R600SIDefault: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
185185

186186
// RUN: %clang_cc1 -triple arm64-unknown -o - -emit-llvm %s | \
187187
// RUN: FileCheck %s -check-prefix=AARCH64
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
// RUN: %clang_cc1 %s -O0 -triple amdgcn -emit-llvm -o - | FileCheck %s
22
// RUN: %clang_cc1 %s -O0 -triple amdgcn---opencl -emit-llvm -o - | FileCheck %s
33

4-
// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
4+
// CHECK: target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
55
void foo(void) {}

llvm/docs/AMDGPUUsage.rst

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -703,23 +703,24 @@ supported for the ``amdgcn`` target.
703703
.. table:: AMDGPU Address Spaces
704704
:name: amdgpu-address-spaces-table
705705

706-
================================= =============== =========== ================ ======= ============================
707-
.. 64-Bit Process Address Space
708-
--------------------------------- --------------- ----------- ---------------- ------------------------------------
709-
Address Space Name LLVM IR Address HSA Segment Hardware Address NULL Value
710-
Space Number Name Name Size
711-
================================= =============== =========== ================ ======= ============================
712-
Generic 0 flat flat 64 0x0000000000000000
713-
Global 1 global global 64 0x0000000000000000
714-
Region 2 N/A GDS 32 *not implemented for AMDHSA*
715-
Local 3 group LDS 32 0xFFFFFFFF
716-
Constant 4 constant *same as global* 64 0x0000000000000000
717-
Private 5 private scratch 32 0xFFFFFFFF
718-
Constant 32-bit 6 *TODO* 0x00000000
719-
Buffer Fat Pointer (experimental) 7 *TODO*
720-
Buffer Resource (experimental) 8 *TODO*
721-
Streamout Registers 128 N/A GS_REGS
722-
================================= =============== =========== ================ ======= ============================
706+
===================================== =============== =========== ================ ======= ============================
707+
.. 64-Bit Process Address Space
708+
------------------------------------- --------------- ----------- ---------------- ------------------------------------
709+
Address Space Name LLVM IR Address HSA Segment Hardware Address NULL Value
710+
Space Number Name Name Size
711+
===================================== =============== =========== ================ ======= ============================
712+
Generic 0 flat flat 64 0x0000000000000000
713+
Global 1 global global 64 0x0000000000000000
714+
Region 2 N/A GDS 32 *not implemented for AMDHSA*
715+
Local 3 group LDS 32 0xFFFFFFFF
716+
Constant 4 constant *same as global* 64 0x0000000000000000
717+
Private 5 private scratch 32 0xFFFFFFFF
718+
Constant 32-bit 6 *TODO* 0x00000000
719+
Buffer Fat Pointer (experimental) 7 *TODO*
720+
Buffer Resource (experimental) 8 *TODO*
721+
Buffer Strided Pointer (experimental) 9 *TODO*
722+
Streamout Registers 128 N/A GS_REGS
723+
===================================== =============== =========== ================ ======= ============================
723724

724725
**Generic**
725726
The generic address space is supported unless the *Target Properties* column
@@ -836,7 +837,7 @@ supported for the ``amdgcn`` target.
836837
the backend.
837838

838839
The buffer descriptor used to construct a buffer fat pointer must be *raw*:
839-
the stride must be 0, the "add tid" flag bust be 0, the swizzle enable bits
840+
the stride must be 0, the "add tid" flag must be 0, the swizzle enable bits
840841
must be off, and the extent must be measured in bytes. (On subtargets where
841842
bounds checking may be disabled, buffer fat pointers may choose to enable
842843
it or not).
@@ -864,6 +865,18 @@ supported for the ``amdgcn`` target.
864865
(bits `127:96`). The specific interpretation of these fields varies by the
865866
target architecture and is detailed in the ISA descriptions.
866867

868+
**Buffer Strided Pointer**
869+
The buffer index pointer is an experimental address space. It represents
870+
a 128-bit buffer descriptor and a 32-bit offset, like the **Buffer Fat
871+
Pointer**. Additionally, it contains an index into the buffer, which
872+
allows the direct addressing of structured elements. These components appear
873+
in that order, i.e., the descriptor comes first, then the 32-bit offset
874+
followed by the 32-bit index.
875+
876+
The bits in the buffer descriptor must meet the following requirements:
877+
the stride is the size of a structured element, the "add tid" flag must be 0,
878+
and the swizzle enable bits must be off.
879+
867880
**Streamout Registers**
868881
Dedicated registers used by the GS NGG Streamout Instructions. The register
869882
file is modelled as a memory in a distinct address space because it is indexed

llvm/include/llvm/Support/AMDGPUAddrSpace.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace llvm {
2525
namespace AMDGPUAS {
2626
enum : unsigned {
2727
// The maximum value for flat, generic, local, private, constant and region.
28-
MAX_AMDGPU_ADDRESS = 8,
28+
MAX_AMDGPU_ADDRESS = 9,
2929

3030
FLAT_ADDRESS = 0, ///< Address space for flat memory.
3131
GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
@@ -42,6 +42,9 @@ enum : unsigned {
4242

4343
BUFFER_RESOURCE = 8, ///< Address space for 128-bit buffer resources.
4444

45+
BUFFER_STRIDED_POINTER = 9, ///< Address space for 192-bit fat buffer
46+
///< pointers with an additional index.
47+
4548
/// Internal address spaces. Can be freely renumbered.
4649
STREAMOUT_REGISTER = 128, ///< Address space for GS NGG Streamout registers.
4750
/// end Internal address spaces.

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5207,17 +5207,21 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
52075207
// This goes before adding new address spaces to prevent incoherent string
52085208
// values.
52095209
if (!DL.contains("-ni") && !DL.starts_with("ni"))
5210-
Res.append("-ni:7:8");
5211-
// Update ni:7 to ni:7:8.
5210+
Res.append("-ni:7:8:9");
5211+
// Update ni:7 to ni:7:8:9.
52125212
if (DL.ends_with("ni:7"))
5213-
Res.append(":8");
5213+
Res.append(":8:9");
5214+
if (DL.ends_with("ni:7:8"))
5215+
Res.append(":9");
52145216

52155217
// Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
52165218
// resources) An empty data layout has already been upgraded to G1 by now.
52175219
if (!DL.contains("-p7") && !DL.starts_with("p7"))
52185220
Res.append("-p7:160:256:256:32");
52195221
if (!DL.contains("-p8") && !DL.starts_with("p8"))
52205222
Res.append("-p8:128:128");
5223+
if (!DL.contains("-p9") && !DL.startswith("p9"))
5224+
Res.append("-p9:192:256:256:32");
52215225

52225226
return Res;
52235227
}

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -410,24 +410,25 @@ inline bool isExtendedGlobalAddrSpace(unsigned AS) {
410410
}
411411

412412
static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
413-
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 8, "Addr space out of range");
413+
static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
414414

415415
if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
416416
return true;
417417

418-
// This array is indexed by address space value enum elements 0 ... to 8
418+
// This array is indexed by address space value enum elements 0 ... to 9
419419
// clang-format off
420-
static const bool ASAliasRules[9][9] = {
421-
/* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc */
422-
/* Flat */ {true, true, false, true, true, true, true, true, true},
423-
/* Global */ {true, true, false, false, true, false, true, true, true},
424-
/* Region */ {false, false, true, false, false, false, false, false, false},
425-
/* Group */ {true, false, false, true, false, false, false, false, false},
426-
/* Constant */ {true, true, false, false, false, false, true, true, true},
427-
/* Private */ {true, false, false, false, false, true, false, false, false},
428-
/* Constant 32-bit */ {true, true, false, false, true, false, false, true, true},
429-
/* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true},
430-
/* Buffer Resource */ {true, true, false, false, true, false, true, true, true},
420+
static const bool ASAliasRules[10][10] = {
421+
/* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
422+
/* Flat */ {true, true, false, true, true, true, true, true, true, true},
423+
/* Global */ {true, true, false, false, true, false, true, true, true, true},
424+
/* Region */ {false, false, true, false, false, false, false, false, false, false},
425+
/* Group */ {true, false, false, true, false, false, false, false, false, false},
426+
/* Constant */ {true, true, false, false, false, false, true, true, true, true},
427+
/* Private */ {true, false, false, false, false, true, false, false, false, false},
428+
/* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true},
429+
/* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true},
430+
/* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true},
431+
/* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true},
431432
};
432433
// clang-format on
433434

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
633633
const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
634634
const LLT BufferFatPtr = GetAddrSpacePtr(AMDGPUAS::BUFFER_FAT_POINTER);
635635
const LLT RsrcPtr = GetAddrSpacePtr(AMDGPUAS::BUFFER_RESOURCE);
636+
const LLT BufferStridedPtr =
637+
GetAddrSpacePtr(AMDGPUAS::BUFFER_STRIDED_POINTER);
636638

637639
const LLT CodePtr = FlatPtr;
638640

@@ -1113,7 +1115,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
11131115
}
11141116

11151117
getActionDefinitionsBuilder(G_PTR_ADD)
1116-
.unsupportedFor({BufferFatPtr, RsrcPtr})
1118+
.unsupportedFor({BufferFatPtr, BufferStridedPtr, RsrcPtr})
11171119
.legalIf(all(isPointer(0), sameSize(0, 1)))
11181120
.scalarize(0)
11191121
.scalarSameSizeAs(1, 0);
@@ -1403,7 +1405,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
14031405
// The custom pointers (fat pointers, buffer resources) don't work with load
14041406
// and store at this level. Fat pointers should have been lowered to
14051407
// intrinsics before the translation to MIR.
1406-
Actions.unsupportedIf(typeInSet(1, {BufferFatPtr, RsrcPtr}));
1408+
Actions.unsupportedIf(
1409+
typeInSet(1, {BufferFatPtr, BufferStridedPtr, RsrcPtr}));
14071410

14081411
// Address space 8 pointers are handled by a 4xs32 load, bitcast, and
14091412
// ptrtoint. This is needed to account for the fact that we can't have i128

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -539,9 +539,10 @@ static StringRef computeDataLayout(const Triple &TT) {
539539
// space 8) which cannot be non-trivilally accessed by LLVM memory operations
540540
// like getelementptr.
541541
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
542-
"-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:"
542+
"-p7:160:256:256:32-p8:128:128-p9:192:256:256:32-i64:64-v16:16-v24:32-"
543+
"v32:32-v48:64-v96:"
543544
"128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
544-
"G1-ni:7:8";
545+
"G1-ni:7:8:9";
545546
}
546547

547548
LLVM_READNONE

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,8 @@ unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
368368
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
369369
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
370370
AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER ||
371-
AddrSpace == AMDGPUAS::BUFFER_RESOURCE) {
371+
AddrSpace == AMDGPUAS::BUFFER_RESOURCE ||
372+
AddrSpace == AMDGPUAS::BUFFER_STRIDED_POINTER) {
372373
return 512;
373374
}
374375

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,12 +1046,20 @@ static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes) {
10461046
MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const {
10471047
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
10481048
return MVT::v5i32;
1049+
if (AMDGPUAS::BUFFER_STRIDED_POINTER == AS &&
1050+
DL.getPointerSizeInBits(AS) == 192)
1051+
return MVT::v6i32;
10491052
return AMDGPUTargetLowering::getPointerTy(DL, AS);
10501053
}
10511054
/// Similarly, the in-memory representation of a p7 is {p8, i32}, aka
10521055
/// v8i32 when padding is added.
1056+
/// The in-memory representation of a p9 is {p8, i32, i32}, which is
1057+
/// also v8i32 with padding.
10531058
MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
1054-
if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
1059+
if ((AMDGPUAS::BUFFER_FAT_POINTER == AS &&
1060+
DL.getPointerSizeInBits(AS) == 160) ||
1061+
(AMDGPUAS::BUFFER_STRIDED_POINTER == AS &&
1062+
DL.getPointerSizeInBits(AS) == 192))
10551063
return MVT::v8i32;
10561064
return AMDGPUTargetLowering::getPointerMemTy(DL, AS);
10571065
}
@@ -1418,7 +1426,8 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
14181426

14191427
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
14201428
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
1421-
AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE) {
1429+
AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE ||
1430+
AS == AMDGPUAS::BUFFER_STRIDED_POINTER) {
14221431
// If the offset isn't a multiple of 4, it probably isn't going to be
14231432
// correctly aligned.
14241433
// FIXME: Can we get the real alignment here?

llvm/test/CodeGen/AMDGPU/amdgpu-alias-analysis.ll

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,3 +248,73 @@ define void @test_8_5(ptr %p) {
248248
load i8, ptr addrspace(3) @shm
249249
ret void
250250
}
251+
252+
; CHECK: MayAlias: i8 addrspace(9)* %p, i8* %p1
253+
define void @test_9_0(ptr addrspace(9) %p, ptr addrspace(0) %p1) {
254+
load i8, ptr addrspace(9) %p
255+
load i8, ptr addrspace(0) %p1
256+
ret void
257+
}
258+
259+
; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(1)* %p1
260+
define void @test_9_1(ptr addrspace(9) %p, ptr addrspace(1) %p1) {
261+
load i8, ptr addrspace(9) %p
262+
load i8, ptr addrspace(1) %p1
263+
ret void
264+
}
265+
266+
; CHECK: NoAlias: i8 addrspace(9)* %p, i8 addrspace(2)* %p1
267+
define void @test_9_2(ptr addrspace(9) %p, ptr addrspace(2) %p1) {
268+
load i8, ptr addrspace(9) %p
269+
load i8, ptr addrspace(2) %p1
270+
ret void
271+
}
272+
273+
; CHECK: NoAlias: i8 addrspace(9)* %p, i8 addrspace(3)* %p1
274+
define void @test_9_3(ptr addrspace(9) %p, ptr addrspace(3) %p1) {
275+
load i8, ptr addrspace(9) %p
276+
load i8, ptr addrspace(3) %p1
277+
ret void
278+
}
279+
280+
; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(4)* %p1
281+
define void @test_9_4(ptr addrspace(9) %p, ptr addrspace(4) %p1) {
282+
load i8, ptr addrspace(9) %p
283+
load i8, ptr addrspace(4) %p1
284+
ret void
285+
}
286+
287+
; CHECK: NoAlias: i8 addrspace(9)* %p, i8 addrspace(5)* %p1
288+
define void @test_9_5(ptr addrspace(9) %p, ptr addrspace(5) %p1) {
289+
load i8, ptr addrspace(9) %p
290+
load i8, ptr addrspace(5) %p1
291+
ret void
292+
}
293+
294+
; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(6)* %p1
295+
define void @test_9_6(ptr addrspace(9) %p, ptr addrspace(6) %p1) {
296+
load i8, ptr addrspace(9) %p
297+
load i8, ptr addrspace(6) %p1
298+
ret void
299+
}
300+
301+
; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(7)* %p1
302+
define void @test_9_7(ptr addrspace(9) %p, ptr addrspace(7) %p1) {
303+
load i8, ptr addrspace(9) %p
304+
load i8, ptr addrspace(7) %p1
305+
ret void
306+
}
307+
308+
; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(8)* %p1
309+
define void @test_9_8(ptr addrspace(9) %p, ptr addrspace(8) %p1) {
310+
load i8, ptr addrspace(9) %p
311+
load i8, ptr addrspace(8) %p1
312+
ret void
313+
}
314+
315+
; CHECK: MayAlias: i8 addrspace(9)* %p, i8 addrspace(9)* %p1
316+
define void @test_9_9(ptr addrspace(9) %p, ptr addrspace(9) %p1) {
317+
load i8, ptr addrspace(9) %p
318+
load i8, ptr addrspace(9) %p1
319+
ret void
320+
}

llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-ceil.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
22
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-simplifylib %s | FileCheck %s
33

4-
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
4+
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9"
55

66
declare float @_Z4ceilf(float)
77
declare <2 x float> @_Z4ceilDv2_f(<2 x float>)

0 commit comments

Comments
 (0)