Skip to content

Commit 67a2904

Browse files
frasercrmcktstellar
authored andcommitted
[VectorCombine] Insert addrspacecast when crossing address space boundaries
We can not bitcast pointers across different address spaces. This was previously fixed in D89577 but then in D93229 an enhancement was added which peeks further through the ponter operand, opening up the possibility that address-space violations could be introduced. Instead of bailing as the previous fix did, simply insert an addrspacecast cast instruction. Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D121787 (cherry picked from commit 2e44b78)
1 parent 3530682 commit 67a2904

File tree

4 files changed

+21
-12
lines changed

4 files changed

+21
-12
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
152152
Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
153153
assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
154154

155-
// If original AS != Load's AS, we can't bitcast the original pointer and have
156-
// to use Load's operand instead. Ideally we would want to strip pointer casts
157-
// without changing AS, but there's no API to do that ATM.
158155
unsigned AS = Load->getPointerAddressSpace();
159-
if (AS != SrcPtr->getType()->getPointerAddressSpace())
160-
SrcPtr = Load->getPointerOperand();
161156

162157
// We are potentially transforming byte-sized (8-bit) memory accesses, so make
163158
// sure we have all of our type-based constraints in place for this target.
@@ -245,7 +240,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
245240
// It is safe and potentially profitable to load a vector directly:
246241
// inselt undef, load Scalar, 0 --> load VecPtr
247242
IRBuilder<> Builder(Load);
248-
Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
243+
Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
244+
SrcPtr, MinVecTy->getPointerTo(AS));
249245
Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
250246
VecLd = Builder.CreateShuffleVector(VecLd, Mask);
251247

llvm/test/Transforms/VectorCombine/AMDGPU/as-transition-inseltpoison.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@ define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture n
1111
; CHECK-LABEL: @load_from_other_as(
1212
; CHECK-NEXT: bb:
1313
; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
14-
; CHECK-NEXT: [[B:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to %struct.hoge*
15-
; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0
16-
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>*
14+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to <1 x float>*
1715
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4
1816
; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1917
; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16

llvm/test/Transforms/VectorCombine/AMDGPU/as-transition.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@ define protected amdgpu_kernel void @load_from_other_as(<4 x float>* nocapture n
1111
; CHECK-LABEL: @load_from_other_as(
1212
; CHECK-NEXT: bb:
1313
; CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_HOGE:%.*]], align 4, addrspace(5)
14-
; CHECK-NEXT: [[B:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to %struct.hoge*
15-
; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_HOGE]], %struct.hoge* [[B]], i64 0, i32 0
16-
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[C]] to <1 x float>*
14+
; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast [[STRUCT_HOGE]] addrspace(5)* [[A]] to <1 x float>*
1715
; CHECK-NEXT: [[TMP1:%.*]] = load <1 x float>, <1 x float>* [[TMP0]], align 4
1816
; CHECK-NEXT: [[E:%.*]] = shufflevector <1 x float> [[TMP1]], <1 x float> poison, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1917
; CHECK-NEXT: store <4 x float> [[E]], <4 x float>* [[RESULTPTR:%.*]], align 16

llvm/test/Transforms/VectorCombine/X86/load-inseltpoison.ll

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,23 @@ define <4 x float> @gep00_load_f32_insert_v4f32_addrspace(<4 x float> addrspace(
253253
ret <4 x float> %r
254254
}
255255

256+
; Should work with addrspace even when peeking past unsafe loads through geps
257+
258+
define <4 x i32> @unsafe_load_i32_insert_v4i32_addrspace(i32* align 16 dereferenceable(16) %v3) {
259+
; CHECK-LABEL: @unsafe_load_i32_insert_v4i32_addrspace(
260+
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast i32* [[V3:%.*]] to <4 x i32> addrspace(42)*
261+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32> addrspace(42)* [[TMP1]], align 16
262+
; CHECK-NEXT: [[INSELT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
263+
; CHECK-NEXT: ret <4 x i32> [[INSELT]]
264+
;
265+
%t0 = getelementptr inbounds i32, i32* %v3, i32 1
266+
%t1 = addrspacecast i32* %t0 to i32 addrspace(42)*
267+
%t2 = getelementptr inbounds i32, i32 addrspace(42)* %t1, i64 1
268+
%val = load i32, i32 addrspace(42)* %t2, align 4
269+
%inselt = insertelement <4 x i32> poison, i32 %val, i32 0
270+
ret <4 x i32> %inselt
271+
}
272+
256273
; If there are enough dereferenceable bytes, we can offset the vector load.
257274

258275
define <8 x i16> @gep01_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceable(18) %p) nofree nosync {

0 commit comments

Comments
 (0)