Skip to content

Commit 3359797

Browse files
committed
address comments + fixup
1 parent 30be471 commit 3359797

File tree

5 files changed

+133
-414
lines changed

5 files changed

+133
-414
lines changed

llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -984,6 +984,9 @@ bool InferAddressSpacesImpl::updateAddressSpace(
984984
// of all its pointer operands.
985985
unsigned NewAS = UninitializedAddressSpace;
986986

987+
// isAddressExpression should guarantee that V is an operator or an argument.
988+
assert(isa<Operator>(V) || isa<Argument>(V));
989+
987990
if (isa<Operator>(V) &&
988991
cast<Operator>(V).getOpcode() == Instruction::Select) {
989992
const Operator &Op = cast<Operator>(V);
@@ -1277,7 +1280,7 @@ void InferAddressSpacesImpl::performPointerReplacement(
12771280
}
12781281

12791282
// Otherwise, replaces the use with flat(NewV).
1280-
if (Instruction *VInst = dyn_cast<Instruction>(V)) {
1283+
if (isa<Instruction>(V) || isa<Instruction>(NewV)) {
12811284
// Don't create a copy of the original addrspacecast.
12821285
if (U == V && isa<AddrSpaceCastInst>(V))
12831286
return;
@@ -1287,14 +1290,14 @@ void InferAddressSpacesImpl::performPointerReplacement(
12871290
if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
12881291
InsertPos = std::next(NewVInst->getIterator());
12891292
else
1290-
InsertPos = std::next(VInst->getIterator());
1293+
InsertPos = std::next(cast<Instruction>(V)->getIterator());
12911294

12921295
while (isa<PHINode>(InsertPos))
12931296
++InsertPos;
12941297
// This instruction may contain multiple uses of V, update them all.
12951298
CurUser->replaceUsesOfWith(
12961299
V, new AddrSpaceCastInst(NewV, V->getType(), "", InsertPos));
1297-
} else if (isa<Constant>(V)) {
1300+
} else {
12981301
CurUserI->replaceUsesOfWith(
12991302
V, ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), V->getType()));
13001303
}

llvm/test/CodeGen/NVPTX/i1-ext-load.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@ define ptx_kernel void @foo(ptr noalias readonly %ptr, ptr noalias %retval) {
1212
; CHECK: .reg .b64 %rd<5>;
1313
; CHECK-EMPTY:
1414
; CHECK: ld.param.u64 %rd1, [foo_param_0];
15-
; CHECK: ld.param.u64 %rd2, [foo_param_1];
16-
; CHECK: cvta.to.global.u64 %rd3, %rd2;
17-
; CHECK: cvta.to.global.u64 %rd4, %rd1;
18-
; CHECK: ld.global.nc.u8 %rs1, [%rd4];
15+
; CHECK: cvta.to.global.u64 %rd2, %rd1;
16+
; CHECK: ld.param.u64 %rd3, [foo_param_1];
17+
; CHECK: cvta.to.global.u64 %rd4, %rd3;
18+
; CHECK: ld.global.nc.u8 %rs1, [%rd2];
1919
; CHECK: cvt.u32.u8 %r1, %rs1;
2020
; CHECK: add.s32 %r2, %r1, 1;
2121
; CHECK: and.b32 %r3, %r2, 1;
22-
; CHECK: st.global.u32 [%rd3], %r3;
22+
; CHECK: st.global.u32 [%rd4], %r3;
2323
; CHECK: ret;
2424
%ld = load i1, ptr %ptr, align 1
2525
%zext = zext i1 %ld to i32

llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll

Lines changed: 13 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
1212
; OPT-LABEL: define dso_local noundef i32 @non_kernel_function(
1313
; OPT-SAME: ptr noundef readonly byval([[STRUCT_UINT4:%.*]]) align 16 captures(none) [[A:%.*]], i1 noundef zeroext [[B:%.*]], i32 noundef [[C:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
1414
; OPT-NEXT: [[ENTRY:.*:]]
15-
; OPT-NEXT: [[A2:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(5)
16-
; OPT-NEXT: [[A1:%.*]] = addrspacecast ptr addrspace(5) [[A2]] to ptr
17-
; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A1]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
15+
; OPT-NEXT: [[A_:%.*]] = select i1 [[B]], ptr [[A]], ptr addrspacecast (ptr addrspace(1) @gi to ptr)
1816
; OPT-NEXT: [[IDX_EXT:%.*]] = sext i32 [[C]] to i64
1917
; OPT-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[A_]], i64 [[IDX_EXT]]
2018
; OPT-NEXT: [[TMP0:%.*]] = load i32, ptr [[ADD_PTR]], align 1
@@ -74,12 +72,10 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
7472
; PTX-NEXT: ret;
7573
; OPT-LABEL: define ptx_kernel void @grid_const_int(
7674
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
77-
; OPT-NEXT: [[OUT2:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
78-
; OPT-NEXT: [[OUT3:%.*]] = addrspacecast ptr addrspace(1) [[OUT2]] to ptr
7975
; OPT-NEXT: [[INPUT11:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
8076
; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4
8177
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
82-
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
78+
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
8379
; OPT-NEXT: ret void
8480
%tmp = load i32, ptr %input1, align 4
8581
%add = add i32 %tmp, %input2
@@ -105,15 +101,13 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
105101
; PTX-NEXT: ret;
106102
; OPT-LABEL: define ptx_kernel void @grid_const_struct(
107103
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] {
108-
; OPT-NEXT: [[OUT4:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
109-
; OPT-NEXT: [[OUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUT4]] to ptr
110104
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
111105
; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0
112106
; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1
113107
; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4
114108
; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4
115109
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
116-
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
110+
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4
117111
; OPT-NEXT: ret void
118112
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
119113
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -233,11 +227,9 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
233227
; PTX-NEXT: ret;
234228
; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape(
235229
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] {
236-
; OPT-NEXT: [[ADDR4:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(1)
237-
; OPT-NEXT: [[ADDR5:%.*]] = addrspacecast ptr addrspace(1) [[ADDR4]] to ptr
238230
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
239231
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
240-
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
232+
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8
241233
; OPT-NEXT: ret void
242234
store ptr %input, ptr %addr, align 8
243235
ret void
@@ -263,14 +255,12 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
263255
; PTX-NOT .local
264256
; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape(
265257
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] {
266-
; OPT-NEXT: [[RESULT4:%.*]] = addrspacecast ptr [[RESULT]] to ptr addrspace(1)
267-
; OPT-NEXT: [[RESULT5:%.*]] = addrspacecast ptr addrspace(1) [[RESULT4]] to ptr
268258
; OPT-NEXT: [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
269259
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
270260
; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
271261
; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
272262
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
273-
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
263+
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT]], align 8
274264
; OPT-NEXT: ret void
275265
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
276266
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -311,13 +301,11 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
311301
; PTX-NEXT: ret;
312302
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
313303
; OPT-SAME: ptr byval(i32) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
314-
; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
315-
; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
316304
; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
317305
; OPT-NEXT: [[INPUT1_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1]])
318306
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4
319307
; OPT-NEXT: [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]]
320-
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
308+
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT]], align 4
321309
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
322310
; OPT-NEXT: ret void
323311
%val = load i32, ptr %input
@@ -361,15 +349,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
361349
; PTX-NEXT: ret;
362350
; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem(
363351
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] {
364-
; OPT-NEXT: [[OUTPUT4:%.*]] = addrspacecast ptr [[OUTPUT]] to ptr addrspace(1)
365-
; OPT-NEXT: [[OUTPUT5:%.*]] = addrspacecast ptr addrspace(1) [[OUTPUT4]] to ptr
366352
; OPT-NEXT: [[INPUT2:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
367353
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2]])
368354
; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0
369355
; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4
370356
; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1
371357
; OPT-NEXT: [[VAL2:%.*]] = load i32, ptr [[PTR2]], align 4
372-
; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT5]], align 8
358+
; OPT-NEXT: store ptr [[INPUT1]], ptr [[OUTPUT]], align 8
373359
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
374360
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
375361
; OPT-NEXT: ret i32 [[ADD]]
@@ -407,11 +393,9 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
407393
; PTX-NEXT: ret;
408394
; OPT-LABEL: define ptx_kernel void @grid_const_phi(
409395
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
410-
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
411-
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
412396
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
413397
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
414-
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
398+
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
415399
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
416400
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
417401
; OPT: [[FIRST]]:
@@ -423,7 +407,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
423407
; OPT: [[MERGE]]:
424408
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
425409
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
426-
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
410+
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
427411
; OPT-NEXT: ret void
428412

429413
%val = load i32, ptr %inout
@@ -470,13 +454,11 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
470454
; PTX-NEXT: ret;
471455
; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc(
472456
; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
473-
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
474-
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
475457
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
476458
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
477459
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
478460
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
479-
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
461+
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
480462
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
481463
; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]]
482464
; OPT: [[FIRST]]:
@@ -488,7 +470,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
488470
; OPT: [[MERGE]]:
489471
; OPT-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ]
490472
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
491-
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
473+
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
492474
; OPT-NEXT: ret void
493475
%val = load i32, ptr %inout
494476
%less = icmp slt i32 %val, 0
@@ -531,17 +513,15 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
531513
; PTX-NEXT: ret;
532514
; OPT-LABEL: define ptx_kernel void @grid_const_select(
533515
; OPT-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] {
534-
; OPT-NEXT: [[INOUT1:%.*]] = addrspacecast ptr [[INOUT]] to ptr addrspace(1)
535-
; OPT-NEXT: [[INOUT2:%.*]] = addrspacecast ptr addrspace(1) [[INOUT1]] to ptr
536516
; OPT-NEXT: [[INPUT2_PARAM:%.*]] = addrspacecast ptr [[INPUT2]] to ptr addrspace(101)
537517
; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT2_PARAM]])
538518
; OPT-NEXT: [[INPUT1_PARAM:%.*]] = addrspacecast ptr [[INPUT1]] to ptr addrspace(101)
539519
; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT1_PARAM]])
540-
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT2]], align 4
520+
; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4
541521
; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0
542522
; OPT-NEXT: [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]]
543523
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
544-
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
524+
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4
545525
; OPT-NEXT: ret void
546526
%val = load i32, ptr %inout
547527
%less = icmp slt i32 %val, 0

llvm/test/CodeGen/NVPTX/lower-args.ll

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes IR,IRC
33
; RUN: opt < %s -S -nvptx-lower-args --mtriple nvptx64-nvidia-nvcl | FileCheck %s --check-prefixes IR,IRO
44
; RUN: llc < %s -mcpu=sm_20 --mtriple nvptx64-nvidia-cuda | FileCheck %s --check-prefixes PTX,PTXC
@@ -17,12 +17,10 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
1717
; IR-LABEL: define void @load_alignment(
1818
; IR-SAME: ptr readonly byval([[CLASS_OUTER:%.*]]) align 8 captures(none) [[ARG:%.*]]) {
1919
; IR-NEXT: [[ENTRY:.*:]]
20-
; IR-NEXT: [[ARG2:%.*]] = addrspacecast ptr [[ARG]] to ptr addrspace(5)
21-
; IR-NEXT: [[ARG1:%.*]] = addrspacecast ptr addrspace(5) [[ARG2]] to ptr
22-
; IR-NEXT: [[ARG_IDX_VAL:%.*]] = load ptr, ptr [[ARG1]], align 8
23-
; IR-NEXT: [[ARG_IDX1:%.*]] = getelementptr [[CLASS_OUTER]], ptr [[ARG1]], i64 0, i32 0, i32 1
20+
; IR-NEXT: [[ARG_IDX_VAL:%.*]] = load ptr, ptr [[ARG]], align 8
21+
; IR-NEXT: [[ARG_IDX1:%.*]] = getelementptr [[CLASS_OUTER]], ptr [[ARG]], i64 0, i32 0, i32 1
2422
; IR-NEXT: [[ARG_IDX1_VAL:%.*]] = load ptr, ptr [[ARG_IDX1]], align 8
25-
; IR-NEXT: [[ARG_IDX2:%.*]] = getelementptr [[CLASS_OUTER]], ptr [[ARG1]], i64 0, i32 1
23+
; IR-NEXT: [[ARG_IDX2:%.*]] = getelementptr [[CLASS_OUTER]], ptr [[ARG]], i64 0, i32 1
2624
; IR-NEXT: [[ARG_IDX2_VAL:%.*]] = load i32, ptr [[ARG_IDX2]], align 8
2725
; IR-NEXT: [[ARG_IDX_VAL_VAL:%.*]] = load i32, ptr [[ARG_IDX_VAL]], align 4
2826
; IR-NEXT: [[ADD_I:%.*]] = add nsw i32 [[ARG_IDX_VAL_VAL]], [[ARG_IDX2_VAL]]
@@ -77,9 +75,7 @@ entry:
7775
define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
7876
; IR-LABEL: define void @load_padding(
7977
; IR-SAME: ptr readonly byval([[CLASS_PADDED:%.*]]) align 4 captures(none) [[ARG:%.*]]) {
80-
; IR-NEXT: [[ARG2:%.*]] = addrspacecast ptr [[ARG]] to ptr addrspace(5)
81-
; IR-NEXT: [[ARG1:%.*]] = addrspacecast ptr addrspace(5) [[ARG2]] to ptr
82-
; IR-NEXT: [[TMP:%.*]] = call ptr @escape(ptr nonnull align 16 [[ARG1]])
78+
; IR-NEXT: [[TMP:%.*]] = call ptr @escape(ptr nonnull align 16 [[ARG]])
8379
; IR-NEXT: ret void
8480
;
8581
; PTX-LABEL: load_padding(
@@ -108,21 +104,11 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
108104
; OpenCL can't make assumptions about incoming pointer, so we should generate
109105
; generic pointers load/store.
110106
define ptx_kernel void @ptr_generic(ptr %out, ptr %in) {
111-
; IRC-LABEL: define ptx_kernel void @ptr_generic(
112-
; IRC-SAME: ptr [[OUT:%.*]], ptr [[IN:%.*]]) {
113-
; IRC-NEXT: [[IN3:%.*]] = addrspacecast ptr [[IN]] to ptr addrspace(1)
114-
; IRC-NEXT: [[IN4:%.*]] = addrspacecast ptr addrspace(1) [[IN3]] to ptr
115-
; IRC-NEXT: [[OUT1:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1)
116-
; IRC-NEXT: [[OUT2:%.*]] = addrspacecast ptr addrspace(1) [[OUT1]] to ptr
117-
; IRC-NEXT: [[V:%.*]] = load i32, ptr [[IN4]], align 4
118-
; IRC-NEXT: store i32 [[V]], ptr [[OUT2]], align 4
119-
; IRC-NEXT: ret void
120-
;
121-
; IRO-LABEL: define ptx_kernel void @ptr_generic(
122-
; IRO-SAME: ptr [[OUT:%.*]], ptr [[IN:%.*]]) {
123-
; IRO-NEXT: [[V:%.*]] = load i32, ptr [[IN]], align 4
124-
; IRO-NEXT: store i32 [[V]], ptr [[OUT]], align 4
125-
; IRO-NEXT: ret void
107+
; IR-LABEL: define ptx_kernel void @ptr_generic(
108+
; IR-SAME: ptr [[OUT:%.*]], ptr [[IN:%.*]]) {
109+
; IR-NEXT: [[V:%.*]] = load i32, ptr [[IN]], align 4
110+
; IR-NEXT: store i32 [[V]], ptr [[OUT]], align 4
111+
; IR-NEXT: ret void
126112
;
127113
; PTXC-LABEL: ptr_generic(
128114
; PTXC: {

0 commit comments

Comments
 (0)