Skip to content

Commit 7dea145

Browse files
committed
[PreISelIntrinsicLowering] Support producing memset_pattern16 when loading from constant global
This is motivated by llvm#126736, and catches a case that would have resulted in memset_pattern16 being produced by LoopIdiomRecognize previously but is missed after moving to the intrinsic in llvm#126736 and relying on PreISelintrinsicLoewring to produce the libcall when available. The logic for handling load instructions that access constant globals could be made more extensive, but it's not clear it would be worthwhile. For now we prioritise the patterns that could be produced by LoopIdiomRecognize.
1 parent a313322 commit 7dea145

File tree

2 files changed

+29
-31
lines changed

2 files changed

+29
-31
lines changed

llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,10 +254,23 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
254254
if (!isLibFuncEmittable(M, &TLI, LibFunc_memset_pattern16))
255255
return nullptr;
256256

257+
// If V is a load instruction that loads from a constant global then attempt
258+
// to use that constant to produce the pattern.
259+
Constant *C = nullptr;
260+
if (auto *LI = dyn_cast<LoadInst>(V)) {
261+
if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand())) {
262+
if (GV->isConstant() && GV->hasInitializer()) {
263+
C = GV->getInitializer();
264+
}
265+
}
266+
}
267+
268+
if (!C)
269+
C = dyn_cast<Constant>(V);
270+
257271
// If the value isn't a constant, we can't promote it to being in a constant
258272
// array. We could theoretically do a store to an alloca or something, but
259273
// that doesn't seem worthwhile.
260-
Constant *C = dyn_cast<Constant>(V);
261274
if (!C || isa<ConstantExpr>(C))
262275
return nullptr;
263276

@@ -284,7 +297,7 @@ static Constant *getMemSetPattern16Value(MemSetPatternInst *Inst,
284297

285298
// Otherwise, we'll use an array of the constants.
286299
uint64_t ArraySize = 16 / Size;
287-
ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
300+
ArrayType *AT = ArrayType::get(C->getType(), ArraySize);
288301
return ConstantArray::get(AT, std::vector<Constant *>(ArraySize, C));
289302
}
290303

llvm/test/Transforms/PreISelIntrinsicLowering/X86/memset-pattern.ll

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,16 @@
44
;.
55
; CHECK: @G = global i32 5
66
; CHECK: @ptr_pat = private unnamed_addr constant ptr @G, align 8
7+
; CHECK: @nonconst_ptr_pat = private unnamed_addr global ptr @G, align 8
78
; CHECK: @.memset_pattern = private unnamed_addr constant [2 x i64] [i64 -6148895925951734307, i64 -6148895925951734307], align 16
89
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [2 x i64] [i64 4614256656552045848, i64 4614256656552045848], align 16
9-
; CHECK: @.memset_pattern.2 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
10-
; CHECK: @.memset_pattern.3 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
11-
; CHECK: @.memset_pattern.4 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
10+
; CHECK: @.memset_pattern.2 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
11+
; CHECK: @.memset_pattern.3 = private unnamed_addr constant [2 x ptr] [ptr @G, ptr @G], align 16
12+
; CHECK: @.memset_pattern.4 = private unnamed_addr constant [8 x i16] [i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555, i16 -21555], align 16
1213
; CHECK: @.memset_pattern.5 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
1314
; CHECK: @.memset_pattern.6 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
15+
; CHECK: @.memset_pattern.7 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
16+
; CHECK: @.memset_pattern.8 = private unnamed_addr constant i128 -113427455635030943652277463699152839203, align 16
1417
;.
1518
define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
1619
; CHECK-LABEL: define void @memset_pattern_i128_1_dynvalue(
@@ -33,7 +36,7 @@ define void @memset_pattern_i128_1_dynvalue(ptr %a, i128 %value) nounwind {
3336
define void @memset_pattern_i128_1(ptr %a, i128 %value) nounwind {
3437
; CHECK-LABEL: define void @memset_pattern_i128_1(
3538
; CHECK-SAME: ptr [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
36-
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 16)
39+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 16)
3740
; CHECK-NEXT: ret void
3841
;
3942
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -61,7 +64,7 @@ define void @memset_pattern_i128_1_nz_as(ptr addrspace(1) %a, i128 %value) nounw
6164
define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nounwind {
6265
; CHECK-LABEL: define void @memset_pattern_i128_1_align_attr(
6366
; CHECK-SAME: ptr align 16 [[A:%.*]], i128 [[VALUE:%.*]]) #[[ATTR0]] {
64-
; CHECK-NEXT: call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.4, i64 16)
67+
; CHECK-NEXT: call void @memset_pattern16(ptr align 16 [[A]], ptr @.memset_pattern.6, i64 16)
6568
; CHECK-NEXT: ret void
6669
;
6770
tail call void @llvm.experimental.memset.pattern(ptr align(16) %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 1, i1 false)
@@ -71,7 +74,7 @@ define void @memset_pattern_i128_1_align_attr(ptr align(16) %a, i128 %value) nou
7174
define void @memset_pattern_i128_16(ptr %a) nounwind {
7275
; CHECK-LABEL: define void @memset_pattern_i128_16(
7376
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
74-
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.5, i64 256)
77+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.7, i64 256)
7578
; CHECK-NEXT: ret void
7679
;
7780
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 16, i1 false)
@@ -82,7 +85,7 @@ define void @memset_pattern_i128_x(ptr %a, i64 %x) nounwind {
8285
; CHECK-LABEL: define void @memset_pattern_i128_x(
8386
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
8487
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 16, [[X]]
85-
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.6, i64 [[TMP1]])
88+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.8, i64 [[TMP1]])
8689
; CHECK-NEXT: ret void
8790
;
8891
tail call void @llvm.experimental.memset.pattern(ptr %a, i128 u0xaaaaaaaabbbbbbbbccccccccdddddddd, i64 %x, i1 false)
@@ -112,7 +115,7 @@ define void @memset_pattern_i16_x(ptr %a, i64 %x) nounwind {
112115
; CHECK-LABEL: define void @memset_pattern_i16_x(
113116
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
114117
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 2, [[X]]
115-
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 [[TMP1]])
118+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.4, i64 [[TMP1]])
116119
; CHECK-NEXT: ret void
117120
;
118121
tail call void @llvm.experimental.memset.pattern(ptr %a, i16 u0xabcd, i64 %x, i1 false)
@@ -149,42 +152,24 @@ define void @memset_pattern_i64_128_tbaa(ptr %a) nounwind {
149152
@G = global i32 5
150153
@ptr_pat = private unnamed_addr constant ptr @G, align 8
151154

152-
; FIXME: memset_pattern16 should be selected.
153155
define void @memset_pattern_i64_16_fromptr(ptr %a) nounwind {
154156
; CHECK-LABEL: define void @memset_pattern_i64_16_fromptr(
155157
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
156158
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @ptr_pat, align 8
157-
; CHECK-NEXT: br i1 false, label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
158-
; CHECK: [[LOADSTORELOOP]]:
159-
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP4:%.*]], %[[LOADSTORELOOP]] ]
160-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP2]]
161-
; CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 1
162-
; CHECK-NEXT: [[TMP4]] = add i64 [[TMP2]], 1
163-
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 16
164-
; CHECK-NEXT: br i1 [[TMP5]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
165-
; CHECK: [[SPLIT]]:
159+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.2, i64 128)
166160
; CHECK-NEXT: ret void
167161
;
168162
%1 = load i64, ptr @ptr_pat, align 8
169163
tail call void @llvm.experimental.memset.pattern(ptr %a, i64 %1, i64 16, i1 false)
170164
ret void
171165
}
172166

173-
; FIXME: memset_pattern16 should be selected.
174167
define void @memset_pattern_i64_x_fromptr(ptr %a, i64 %x) nounwind {
175168
; CHECK-LABEL: define void @memset_pattern_i64_x_fromptr(
176169
; CHECK-SAME: ptr [[A:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
177170
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @ptr_pat, align 8
178-
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 0, [[X]]
179-
; CHECK-NEXT: br i1 [[TMP2]], label %[[SPLIT:.*]], label %[[LOADSTORELOOP:.*]]
180-
; CHECK: [[LOADSTORELOOP]]:
181-
; CHECK-NEXT: [[TMP3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP5:%.*]], %[[LOADSTORELOOP]] ]
182-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP3]]
183-
; CHECK-NEXT: store i64 [[TMP1]], ptr [[TMP4]], align 1
184-
; CHECK-NEXT: [[TMP5]] = add i64 [[TMP3]], 1
185-
; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP5]], [[X]]
186-
; CHECK-NEXT: br i1 [[TMP6]], label %[[LOADSTORELOOP]], label %[[SPLIT]]
187-
; CHECK: [[SPLIT]]:
171+
; CHECK-NEXT: [[TMP2:%.*]] = mul i64 8, [[X]]
172+
; CHECK-NEXT: call void @memset_pattern16(ptr [[A]], ptr @.memset_pattern.3, i64 [[TMP2]])
188173
; CHECK-NEXT: ret void
189174
;
190175
%1 = load i64, ptr @ptr_pat, align 8

0 commit comments

Comments
 (0)