Skip to content

Commit 2f3ce34

Browse files
AlexMacleanyuxuanchen1997
authored andcommitted
[NVPTX] enforce signed 32 bit type for immediate offset (#99682)
Summary: The NVPTX ISA states that an immOff must fit in a signed 32-bit integer (https://docs.nvidia.com/cuda/parallel-thread-execution/#addresses-as-operands): > `[reg+immOff]` > > a sum of register `reg` containing a byte address plus a constant > integer byte offset (signed, 32-bit). > > `[var+immOff]` > > a sum of address of addressable variable `var` containing a byte > address plus a constant integer byte offset (signed, 32-bit). Currently we do not consider this constraint, meaning that in some edge cases we generate invalid PTX when a value is offset by a very large immediate. Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251391
1 parent 65d79a9 commit 2f3ce34

File tree

4 files changed

+122
-4
lines changed

4 files changed

+122
-4
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3752,8 +3752,14 @@ bool NVPTXDAGToDAGISel::SelectADDRri_imp(
37523752
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
37533753
else
37543754
Base = Addr.getOperand(0);
3755-
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3756-
mvt);
3755+
3756+
// Offset must fit in a 32-bit signed int in PTX [register+offset] address
3757+
// mode
3758+
if (!CN->getAPIntValue().isSignedIntN(32))
3759+
return false;
3760+
3761+
Offset = CurDAG->getTargetConstant(CN->getSExtValue(), SDLoc(OpNode),
3762+
MVT::i32);
37573763
return true;
37583764
}
37593765
}

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5167,9 +5167,12 @@ bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
51675167
// - [areg+immoff]
51685168
// - [immAddr]
51695169

5170-
if (AM.BaseGV) {
5170+
// immoff must fit in a signed 32-bit int
5171+
if (!APInt(64, AM.BaseOffs).isSignedIntN(32))
5172+
return false;
5173+
5174+
if (AM.BaseGV)
51715175
return !AM.BaseOffs && !AM.HasBaseReg && !AM.Scale;
5172-
}
51735176

51745177
switch (AM.Scale) {
51755178
case 0: // "r", "r+i" or "i" is allowed

llvm/test/CodeGen/NVPTX/addr-mode.ll

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -march=nvptx64 | FileCheck %s
3+
4+
target triple = "nvptx64-nvidia-cuda"
5+
6+
define i32 @test_addr_mode_i64(ptr %x) {
7+
; CHECK-LABEL: test_addr_mode_i64(
8+
; CHECK: {
9+
; CHECK-NEXT: .reg .b32 %r<2>;
10+
; CHECK-NEXT: .reg .b64 %rd<2>;
11+
; CHECK-EMPTY:
12+
; CHECK-NEXT: // %bb.0:
13+
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_param_0];
14+
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
15+
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
16+
; CHECK-NEXT: ret;
17+
%addr = getelementptr i32, ptr %x, i64 -1
18+
%res = load i32, ptr %addr
19+
ret i32 %res
20+
}
21+
22+
define i32 @test_addr_mode_i32(ptr %x) {
23+
; CHECK-LABEL: test_addr_mode_i32(
24+
; CHECK: {
25+
; CHECK-NEXT: .reg .b32 %r<2>;
26+
; CHECK-NEXT: .reg .b64 %rd<2>;
27+
; CHECK-EMPTY:
28+
; CHECK-NEXT: // %bb.0:
29+
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i32_param_0];
30+
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
31+
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
32+
; CHECK-NEXT: ret;
33+
%addr = getelementptr i32, ptr %x, i32 -1
34+
%res = load i32, ptr %addr
35+
ret i32 %res
36+
}
37+
38+
define i32 @test_addr_mode_i16(ptr %x) {
39+
; CHECK-LABEL: test_addr_mode_i16(
40+
; CHECK: {
41+
; CHECK-NEXT: .reg .b32 %r<2>;
42+
; CHECK-NEXT: .reg .b64 %rd<2>;
43+
; CHECK-EMPTY:
44+
; CHECK-NEXT: // %bb.0:
45+
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i16_param_0];
46+
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
47+
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
48+
; CHECK-NEXT: ret;
49+
%addr = getelementptr i32, ptr %x, i16 -1
50+
%res = load i32, ptr %addr
51+
ret i32 %res
52+
}
53+
54+
define i32 @test_addr_mode_i8(ptr %x) {
55+
; CHECK-LABEL: test_addr_mode_i8(
56+
; CHECK: {
57+
; CHECK-NEXT: .reg .b32 %r<2>;
58+
; CHECK-NEXT: .reg .b64 %rd<2>;
59+
; CHECK-EMPTY:
60+
; CHECK-NEXT: // %bb.0:
61+
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i8_param_0];
62+
; CHECK-NEXT: ld.u32 %r1, [%rd1+-4];
63+
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
64+
; CHECK-NEXT: ret;
65+
%addr = getelementptr i32, ptr %x, i8 -1
66+
%res = load i32, ptr %addr
67+
ret i32 %res
68+
}
69+
70+
define i32 @test_addr_mode_i64_large(ptr %x) {
71+
; CHECK-LABEL: test_addr_mode_i64_large(
72+
; CHECK: {
73+
; CHECK-NEXT: .reg .b32 %r<2>;
74+
; CHECK-NEXT: .reg .b64 %rd<3>;
75+
; CHECK-EMPTY:
76+
; CHECK-NEXT: // %bb.0:
77+
; CHECK-NEXT: ld.param.u64 %rd1, [test_addr_mode_i64_large_param_0];
78+
; CHECK-NEXT: add.s64 %rd2, %rd1, 17179869172;
79+
; CHECK-NEXT: ld.u32 %r1, [%rd2];
80+
; CHECK-NEXT: st.param.b32 [func_retval0+0], %r1;
81+
; CHECK-NEXT: ret;
82+
%addr = getelementptr i32, ptr %x, i64 4294967293
83+
%res = load i32, ptr %addr
84+
ret i32 %res
85+
}

llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -397,3 +397,27 @@ entry:
397397
%ptr2 = getelementptr inbounds %struct0, ptr %ptr, i65 1, i32 3, i64 %idx, i32 1
398398
ret ptr %ptr2
399399
}
400+
401+
; Do not extract large constant offset that cannot be folded in to PTX
402+
; addressing mode
403+
define void @large_offset(ptr %out, i32 %in) {
404+
; CHECK-LABEL: define void @large_offset(
405+
; CHECK-SAME: ptr [[OUT:%.*]], i32 [[IN:%.*]]) {
406+
; CHECK-NEXT: entry:
407+
; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
408+
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[TMP0]], 536870912
409+
; CHECK-NEXT: [[IDX:%.*]] = zext nneg i32 [[ADD]] to i64
410+
; CHECK-NEXT: [[GETELEM:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDX]]
411+
; CHECK-NEXT: store i32 [[IN]], ptr [[GETELEM]], align 4
412+
; CHECK-NEXT: ret void
413+
;
414+
entry:
415+
%0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
416+
%add = add nuw nsw i32 %0, 536870912
417+
%idx = zext nneg i32 %add to i64
418+
%getElem = getelementptr inbounds i32, ptr %out, i64 %idx
419+
store i32 %in, ptr %getElem, align 4
420+
ret void
421+
}
422+
423+
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()

0 commit comments

Comments
 (0)