Skip to content

Commit 6fc63ab

Browse files
authored
[AMDGPULowerBufferFatPointers] Simplify and fix GEP offset emission (#95115)
Use emitGEPOffset() to emit the GEP offset, which already has all the necessary logic. This also fixes the nuw flag incorrectly being set on the offset calculation, while only nsw is implied by inbounds.
1 parent 3b3b839 commit 6fc63ab

File tree

3 files changed

+61
-87
lines changed

3 files changed

+61
-87
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Lines changed: 9 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@
200200
#include "llvm/ADT/SetOperations.h"
201201
#include "llvm/ADT/SmallVector.h"
202202
#include "llvm/Analysis/ConstantFolding.h"
203+
#include "llvm/Analysis/Utils/Local.h"
203204
#include "llvm/CodeGen/TargetPassConfig.h"
204205
#include "llvm/IR/AttributeMask.h"
205206
#include "llvm/IR/Constants.h"
@@ -1393,50 +1394,17 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
13931394
IRB.SetInsertPoint(&GEP);
13941395

13951396
auto [Rsrc, Off] = getPtrParts(Ptr);
1396-
Type *OffTy = Off->getType();
13971397
const DataLayout &DL = GEP.getModule()->getDataLayout();
13981398
bool InBounds = GEP.isInBounds();
13991399

1400-
// In order to call collectOffset() and thus not have to reimplement it,
1401-
// we need the GEP's pointer operand to have ptr addrspace(7) type
1402-
GEP.setOperand(GEP.getPointerOperandIndex(),
1403-
PoisonValue::get(IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER)));
1404-
MapVector<Value *, APInt> VariableOffs;
1405-
APInt ConstOffVal = APInt::getZero(BufferOffsetWidth);
1406-
if (!GEP.collectOffset(DL, BufferOffsetWidth, VariableOffs, ConstOffVal))
1407-
report_fatal_error("Scalable vector or unsized struct in fat pointer GEP");
1408-
GEP.setOperand(GEP.getPointerOperandIndex(), Ptr);
1409-
Value *OffAccum = nullptr;
1410-
// Accumulate offsets together before adding to the base in order to preserve
1411-
// as many of the inbounds properties as possible.
1412-
for (auto [Arg, Multiple] : VariableOffs) {
1413-
if (auto *OffVecTy = dyn_cast<VectorType>(OffTy))
1414-
if (!Arg->getType()->isVectorTy())
1415-
Arg = IRB.CreateVectorSplat(OffVecTy->getElementCount(), Arg);
1416-
Arg = IRB.CreateIntCast(Arg, OffTy, /*isSigned=*/true);
1417-
if (!Multiple.isOne()) {
1418-
if (Multiple.isPowerOf2())
1419-
Arg = IRB.CreateShl(Arg, Multiple.logBase2(), "", /*hasNUW=*/InBounds,
1420-
/*HasNSW=*/InBounds);
1421-
else
1422-
Arg = IRB.CreateMul(Arg, ConstantExpr::getIntegerValue(OffTy, Multiple),
1423-
"", /*hasNUW=*/InBounds, /*hasNSW=*/InBounds);
1424-
}
1425-
if (OffAccum)
1426-
OffAccum = IRB.CreateAdd(OffAccum, Arg, "", /*hasNUW=*/InBounds,
1427-
/*hasNSW=*/InBounds);
1428-
else
1429-
OffAccum = Arg;
1430-
}
1431-
if (!ConstOffVal.isZero()) {
1432-
Constant *ConstOff = ConstantExpr::getIntegerValue(OffTy, ConstOffVal);
1433-
if (OffAccum)
1434-
OffAccum = IRB.CreateAdd(OffAccum, ConstOff, "", /*hasNUW=*/InBounds,
1435-
/*hasNSW=*/InBounds);
1436-
else
1437-
OffAccum = ConstOff;
1438-
}
1439-
1400+
// In order to call emitGEPOffset() and thus not have to reimplement it,
1401+
// we need the GEP result to have ptr addrspace(7) type.
1402+
Type *FatPtrTy = IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER);
1403+
if (auto *VT = dyn_cast<VectorType>(Off->getType()))
1404+
FatPtrTy = VectorType::get(FatPtrTy, VT->getElementCount());
1405+
GEP.mutateType(FatPtrTy);
1406+
Value *OffAccum = emitGEPOffset(&IRB, DL, &GEP);
1407+
GEP.mutateType(Ptr->getType());
14401408
if (!OffAccum) { // Constant-zero offset
14411409
SplitUsers.insert(&GEP);
14421410
return {Rsrc, Off};

llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@ define ptr addrspace(7) @gep(ptr addrspace(7) %in, i32 %idx) {
1010
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[IN:%.*]], i32 [[IDX:%.*]]) #[[ATTR0:[0-9]+]] {
1111
; CHECK-NEXT: [[IN_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[IN]], 0
1212
; CHECK-NEXT: [[IN_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[IN]], 1
13-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw i32 [[IDX]], 40
14-
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 32
15-
; CHECK-NEXT: [[RET:%.*]] = add i32 [[IN_OFF]], [[TMP2]]
16-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[IN_RSRC]], 0
17-
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP3]], i32 [[RET]], 1
18-
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP4]]
13+
; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw i32 [[IDX]], 40
14+
; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw i32 [[RET_IDX]], 8
15+
; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw i32 [[RET_OFFS]], 24
16+
; CHECK-NEXT: [[RET:%.*]] = add i32 [[IN_OFF]], [[RET_OFFS1]]
17+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[IN_RSRC]], 0
18+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1
19+
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]]
1920
;
2021
%ret = getelementptr inbounds {i32, [4 x ptr]}, ptr addrspace(7) %in, i32 %idx, i32 1, i32 3
2122
ret ptr addrspace(7) %ret
@@ -26,12 +27,13 @@ define <2 x ptr addrspace(7)> @gep_vectors(<2 x ptr addrspace(7)> %in, <2 x i32>
2627
; CHECK-SAME: ({ <2 x ptr addrspace(8)>, <2 x i32> } [[IN:%.*]], <2 x i32> [[IDX:%.*]]) #[[ATTR0]] {
2728
; CHECK-NEXT: [[IN_RSRC:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 0
2829
; CHECK-NEXT: [[IN_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 1
29-
; CHECK-NEXT: [[TMP1:%.*]] = mul nuw nsw <2 x i32> [[IDX]], <i32 40, i32 40>
30-
; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw <2 x i32> [[TMP1]], <i32 32, i32 32>
31-
; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[TMP2]]
32-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0
33-
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP3]], <2 x i32> [[RET]], 1
34-
; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP4]]
30+
; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[IDX]], <i32 40, i32 40>
31+
; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], <i32 8, i32 8>
32+
; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], <i32 24, i32 24>
33+
; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]]
34+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0
35+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1
36+
; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP2]]
3537
;
3638
%ret = getelementptr inbounds {i32, [4 x ptr]}, <2 x ptr addrspace(7)> %in, <2 x i32> %idx, i32 1, i32 3
3739
ret <2 x ptr addrspace(7)> %ret
@@ -44,13 +46,14 @@ define <2 x ptr addrspace(7)> @gep_vector_scalar(<2 x ptr addrspace(7)> %in, i64
4446
; CHECK-NEXT: [[IN_OFF:%.*]] = extractvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], 1
4547
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[IDX]], i64 0
4648
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
47-
; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[DOTSPLAT]] to <2 x i32>
48-
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw nsw <2 x i32> [[TMP1]], <i32 40, i32 40>
49-
; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw <2 x i32> [[TMP2]], <i32 32, i32 32>
50-
; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[TMP3]]
51-
; CHECK-NEXT: [[TMP4:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0
52-
; CHECK-NEXT: [[TMP5:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP4]], <2 x i32> [[RET]], 1
53-
; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP5]]
49+
; CHECK-NEXT: [[DOTSPLAT_C:%.*]] = trunc <2 x i64> [[DOTSPLAT]] to <2 x i32>
50+
; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw <2 x i32> [[DOTSPLAT_C]], <i32 40, i32 40>
51+
; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], <i32 8, i32 8>
52+
; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], <i32 24, i32 24>
53+
; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]]
54+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0
55+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1
56+
; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP2]]
5457
;
5558
%ret = getelementptr inbounds {i32, [4 x ptr]}, <2 x ptr addrspace(7)> %in, i64 %idx, i32 1, i32 3
5659
ret <2 x ptr addrspace(7)> %ret
@@ -61,11 +64,11 @@ define ptr addrspace(7) @simple_gep(ptr addrspace(7) %ptr, i32 %off) {
6164
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]], i32 [[OFF:%.*]]) #[[ATTR0]] {
6265
; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0
6366
; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1
64-
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[OFF]], 2
65-
; CHECK-NEXT: [[RET:%.*]] = add i32 [[PTR_OFF]], [[TMP1]]
66-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0
67-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP2]], i32 [[RET]], 1
68-
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP3]]
67+
; CHECK-NEXT: [[RET_IDX:%.*]] = mul i32 [[OFF]], 4
68+
; CHECK-NEXT: [[RET:%.*]] = add i32 [[PTR_OFF]], [[RET_IDX]]
69+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0
70+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1
71+
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]]
6972
;
7073
%ret = getelementptr i32, ptr addrspace(7) %ptr, i32 %off
7174
ret ptr addrspace(7) %ret
@@ -76,11 +79,11 @@ define ptr addrspace(7) @simple_inbounds_gep(ptr addrspace(7) %ptr, i32 %off) {
7679
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]], i32 [[OFF:%.*]]) #[[ATTR0]] {
7780
; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0
7881
; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1
79-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i32 [[OFF]], 2
80-
; CHECK-NEXT: [[RET:%.*]] = add i32 [[PTR_OFF]], [[TMP1]]
81-
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0
82-
; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP2]], i32 [[RET]], 1
83-
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP3]]
82+
; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw i32 [[OFF]], 4
83+
; CHECK-NEXT: [[RET:%.*]] = add i32 [[PTR_OFF]], [[RET_IDX]]
84+
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0
85+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1
86+
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]]
8487
;
8588
%ret = getelementptr inbounds i32, ptr addrspace(7) %ptr, i32 %off
8689
ret ptr addrspace(7) %ret
@@ -91,9 +94,10 @@ define ptr addrspace(7) @zero_gep(ptr addrspace(7) %ptr) {
9194
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] {
9295
; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0
9396
; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1
97+
; CHECK-NEXT: [[RET:%.*]] = add i32 [[PTR_OFF]], 0
9498
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0
95-
; CHECK-NEXT: [[RET:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[PTR_OFF]], 1
96-
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]]
99+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1
100+
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]]
97101
;
98102
%ret = getelementptr i8, ptr addrspace(7) %ptr, i32 0
99103
ret ptr addrspace(7) %ret
@@ -105,9 +109,10 @@ define ptr addrspace(7) @zero_gep_goes_second(ptr addrspace(7) %v0, i32 %arg) {
105109
; CHECK-NEXT: [[V0_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V0]], 0
106110
; CHECK-NEXT: [[V0_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V0]], 1
107111
; CHECK-NEXT: [[V1:%.*]] = add i32 [[V0_OFF]], [[ARG]]
112+
; CHECK-NEXT: [[V2:%.*]] = add i32 [[V1]], 0
108113
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[V0_RSRC]], 0
109-
; CHECK-NEXT: [[V2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[V1]], 1
110-
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[V2]]
114+
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[V2]], 1
115+
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]]
111116
;
112117
%v1 = getelementptr i8, ptr addrspace(7) %v0, i32 %arg
113118
%v2 = getelementptr i8, ptr addrspace(7) %v1, i32 0
@@ -119,7 +124,8 @@ define ptr addrspace(7) @zero_gep_goes_first(ptr addrspace(7) %v0, i32 %arg) {
119124
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[V0:%.*]], i32 [[ARG:%.*]]) #[[ATTR0]] {
120125
; CHECK-NEXT: [[V0_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V0]], 0
121126
; CHECK-NEXT: [[V0_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V0]], 1
122-
; CHECK-NEXT: [[V2:%.*]] = add i32 [[V0_OFF]], [[ARG]]
127+
; CHECK-NEXT: [[V1:%.*]] = add i32 [[V0_OFF]], 0
128+
; CHECK-NEXT: [[V2:%.*]] = add i32 [[V1]], [[ARG]]
123129
; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[V0_RSRC]], 0
124130
; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[V2]], 1
125131
; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]]

llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,32 +28,32 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace
2828
; CHECK-NEXT: [[BUF_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP4]] to ptr addrspace(8), !dbg [[DBG27]]
2929
; CHECK-NEXT: [[BUF_PTR_2_PTR_OFF:%.*]] = trunc i160 [[BUF_PTR_2]] to i32, !dbg [[DBG27]]
3030
; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27]]
31-
; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[IDX]], 2, !dbg [[DBG28:![0-9]+]]
32-
; CHECK-NEXT: [[BUF_PTR_3:%.*]] = add i32 [[BUF_PTR_2_PTR_OFF]], [[TMP5]], !dbg [[DBG28]]
31+
; CHECK-NEXT: [[BUF_PTR_3_IDX:%.*]] = mul i32 [[IDX]], 4, !dbg [[DBG28:![0-9]+]]
32+
; CHECK-NEXT: [[BUF_PTR_3:%.*]] = add i32 [[BUF_PTR_2_PTR_OFF]], [[BUF_PTR_3_IDX]], !dbg [[DBG28]]
3333
; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]]
3434
; CHECK-NEXT: [[BUF_PTR_3_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF_PTR_2_PTR_RSRC]] to i160, !dbg [[DBG29:![0-9]+]]
35-
; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i160 [[BUF_PTR_3_INT_RSRC]], 32, !dbg [[DBG29]]
35+
; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i160 [[BUF_PTR_3_INT_RSRC]], 32, !dbg [[DBG29]]
3636
; CHECK-NEXT: [[BUF_PTR_3_INT_OFF:%.*]] = zext i32 [[BUF_PTR_3]] to i160, !dbg [[DBG29]]
37-
; CHECK-NEXT: [[BUF_PTR_3_INT:%.*]] = or i160 [[TMP6]], [[BUF_PTR_3_INT_OFF]], !dbg [[DBG29]]
37+
; CHECK-NEXT: [[BUF_PTR_3_INT:%.*]] = or i160 [[TMP5]], [[BUF_PTR_3_INT_OFF]], !dbg [[DBG29]]
3838
; CHECK-NEXT: store i160 [[BUF_PTR_3_INT]], ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG29]]
3939
; CHECK-NEXT: [[BUF_PTR_4:%.*]] = load i160, ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG30:![0-9]+]]
40-
; CHECK-NEXT: [[TMP7:%.*]] = lshr i160 [[BUF_PTR_4]], 32, !dbg [[DBG30]]
41-
; CHECK-NEXT: [[TMP8:%.*]] = trunc i160 [[TMP7]] to i128, !dbg [[DBG30]]
42-
; CHECK-NEXT: [[BUF_PTR_4_PTR_RSRC:%.*]] = inttoptr i128 [[TMP8]] to ptr addrspace(8), !dbg [[DBG30]]
40+
; CHECK-NEXT: [[TMP6:%.*]] = lshr i160 [[BUF_PTR_4]], 32, !dbg [[DBG30]]
41+
; CHECK-NEXT: [[TMP7:%.*]] = trunc i160 [[TMP6]] to i128, !dbg [[DBG30]]
42+
; CHECK-NEXT: [[BUF_PTR_4_PTR_RSRC:%.*]] = inttoptr i128 [[TMP7]] to ptr addrspace(8), !dbg [[DBG30]]
4343
; CHECK-NEXT: [[BUF_PTR_4_PTR_OFF:%.*]] = trunc i160 [[BUF_PTR_4]] to i32, !dbg [[DBG30]]
4444
; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]]
4545
; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF_PTR_4_PTR_RSRC]], i32 [[BUF_PTR_4_PTR_OFF]], i32 0, i32 0), !dbg [[DBG31:![0-9]+]]
4646
; CHECK-NEXT: tail call void @llvm.dbg.value(metadata float [[RET]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]]
4747
; CHECK-NEXT: [[AUX_PTR_2:%.*]] = load i160, ptr addrspace(5) [[AUX_PTR_VAR]], align 32, !dbg [[DBG32:![0-9]+]]
48-
; CHECK-NEXT: [[TMP9:%.*]] = lshr i160 [[AUX_PTR_2]], 32, !dbg [[DBG32]]
49-
; CHECK-NEXT: [[TMP10:%.*]] = trunc i160 [[TMP9]] to i128, !dbg [[DBG32]]
50-
; CHECK-NEXT: [[AUX_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP10]] to ptr addrspace(8), !dbg [[DBG32]]
48+
; CHECK-NEXT: [[TMP8:%.*]] = lshr i160 [[AUX_PTR_2]], 32, !dbg [[DBG32]]
49+
; CHECK-NEXT: [[TMP9:%.*]] = trunc i160 [[TMP8]] to i128, !dbg [[DBG32]]
50+
; CHECK-NEXT: [[AUX_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP9]] to ptr addrspace(8), !dbg [[DBG32]]
5151
; CHECK-NEXT: [[AUX_PTR_2_PTR_OFF:%.*]] = trunc i160 [[AUX_PTR_2]] to i32, !dbg [[DBG32]]
5252
; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]]
5353
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF_PTR_4_PTR_RSRC]] to i160, !dbg [[DBG33:![0-9]+]]
54-
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i160 [[BUF_PTR_4_PTR_INT_RSRC]], 32, !dbg [[DBG33]]
54+
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i160 [[BUF_PTR_4_PTR_INT_RSRC]], 32, !dbg [[DBG33]]
5555
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_OFF:%.*]] = zext i32 [[BUF_PTR_4_PTR_OFF]] to i160, !dbg [[DBG33]]
56-
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT:%.*]] = or i160 [[TMP11]], [[BUF_PTR_4_PTR_INT_OFF]], !dbg [[DBG33]]
56+
; CHECK-NEXT: [[BUF_PTR_4_PTR_INT:%.*]] = or i160 [[TMP10]], [[BUF_PTR_4_PTR_INT_OFF]], !dbg [[DBG33]]
5757
; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i160(i160 [[BUF_PTR_4_PTR_INT]], ptr addrspace(8) align 32 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_OFF]], i32 0, i32 0), !dbg [[DBG33]]
5858
; CHECK-NEXT: ret float [[RET]], !dbg [[DBG34:![0-9]+]]
5959
;

0 commit comments

Comments
 (0)