Skip to content

Commit c16b7e5

Browse files
committed
[NVPTX] Allow using v4i32 for memcpy lowering.
Differential Revision: https://reviews.llvm.org/D152317
1 parent 1d96e24 commit c16b7e5

File tree

2 files changed

+52
-0
lines changed

2 files changed

+52
-0
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -572,6 +572,17 @@ class NVPTXTargetLowering : public TargetLowering {
572572
// instruction, so we say that ctlz is cheap to speculate.
573573
bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; }
574574

575+
EVT getOptimalMemOpType(const MemOp &Op,
576+
const AttributeList &FuncAttributes) const override {
577+
return (Op.size() >= 16 && Op.isDstAligned(Align(16))) ? MVT::v4i32
578+
: MVT::Other;
579+
}
580+
581+
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
582+
if (VT == MVT::v4i32)
583+
return VT;
584+
return TargetLoweringBase::getTypeToTransformTo(Context, VT);
585+
}
575586
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
576587
return AtomicExpansionKind::None;
577588
}

llvm/test/CodeGen/NVPTX/intrinsics.ll

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,46 @@ define i64 @test_clock64() {
133133
ret i64 %ret
134134
}
135135

136+
%struct.S = type { [4 x i64] }
137+
138+
; CHECK-LABEL: test_memcpy
139+
define dso_local void @test_memcpy(ptr noundef %dst, ptr noundef %src) #0 {
140+
; CHECK-DAG: ld.param.u{{32|64}} %[[D:(r|rd)[0-9]+]], [test_memcpy_param_0];
141+
; CHECK-DAG: ld.param.u{{32|64}} %[[S:(r|rd)[0-9]+]], [test_memcpy_param_1];
142+
; CHECK-DAG: ld.u8 %[[V30:rs[0-9]+]], [%[[S]]+30];
143+
; CHECK-DAG: st.u8 [%[[D]]+30], %[[V30]];
144+
; CHECK-DAG: ld.u16 %[[V28:rs[0-9]+]], [%[[S]]+28];
145+
; CHECK-DAG: st.u16 [%[[D]]+28], %[[V28]];
146+
; CHECK-DAG: ld.u32 %[[V24:r[0-9]+]], [%[[S]]+24];
147+
; CHECK-DAG: st.u32 [%[[D]]+24], %[[V24]];
148+
; CHECK-DAG: ld.u64 %[[V16:rd[0-9]+]], [%[[S]]+16];
149+
; CHECK-DAG: st.u64 [%[[D]]+16], %[[V16]];
150+
; CHECK-DAG: ld.v4.u32 {[[V0:%r[0-9]+, %r[0-9]+, %r[0-9]+, %r[0-9]+]]}, [%[[S]]];
151+
; CHECK-DAG: st.v4.u32 [%[[D]]], {[[V0]]};
152+
call void @llvm.memcpy.p0.p0.i64(ptr align 16 %dst, ptr align 16 %src, i64 31, i1 false)
153+
ret void
154+
}
155+
156+
; CHECK-LABEL: test_memcpy_a8
157+
define dso_local void @test_memcpy_a8(ptr noundef %dst, ptr noundef %src) #0 {
158+
; CHECK-DAG: ld.param.u{{32|64}} %[[D:(r|rd)[0-9]+]], [test_memcpy_a8_param_0];
159+
; CHECK-DAG: ld.param.u{{32|64}} %[[S:(r|rd)[0-9]+]], [test_memcpy_a8_param_1];
160+
; CHECK-DAG: ld.u8 %[[V30:rs[0-9]+]], [%[[S]]+30];
161+
; CHECK-DAG: st.u8 [%[[D]]+30], %[[V30]];
162+
; CHECK-DAG: ld.u16 %[[V28:rs[0-9]+]], [%[[S]]+28];
163+
; CHECK-DAG: st.u16 [%[[D]]+28], %[[V28]];
164+
; CHECK-DAG: ld.u32 %[[V24:r[0-9]+]], [%[[S]]+24];
165+
; CHECK-DAG: st.u32 [%[[D]]+24], %[[V24]];
166+
; CHECK-DAG: ld.u64 %[[V16:rd[0-9]+]], [%[[S]]+16];
167+
; CHECK-DAG: st.u64 [%[[D]]+16], %[[V16]];
168+
; CHECK-DAG: ld.u64 %[[V8:rd[0-9]+]], [%[[S]]+8];
169+
; CHECK-DAG: st.u64 [%[[D]]+8], %[[V8]];
170+
; CHECK-DAG: ld.u64 %[[V0:rd[0-9]+]], [%[[S]]];
171+
; CHECK-DAG: st.u64 [%[[D]]], %[[V0]];
172+
call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %src, i64 31, i1 false)
173+
ret void
174+
}
175+
136176
declare float @llvm.fabs.f32(float)
137177
declare double @llvm.fabs.f64(double)
138178
declare float @llvm.nvvm.sqrt.f(float)
@@ -142,6 +182,7 @@ declare i64 @llvm.bitreverse.i64(i64)
142182
declare i16 @llvm.ctpop.i16(i16)
143183
declare i32 @llvm.ctpop.i32(i32)
144184
declare i64 @llvm.ctpop.i64(i64)
185+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
145186

146187
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
147188
declare i32 @llvm.nvvm.read.ptx.sreg.clock()

0 commit comments

Comments
 (0)