Skip to content

Commit 6157538

Browse files
[InstCombine] ptrmask of gep for dynamic pointer aligment (#80002)
Targets the dynamic realignment pattern of `(Ptr + Align - 1) & -Align;` as implemented by gep then ptrmask. Specifically, when the pointer already has alignment information, dynamically realigning it to less than is already known should be a no-op. Discovered while writing test cases for another patch. For the zero low bits of a known aligned pointer, adding the gep index then removing it with a mask is a no-op. Folding the ptrmask effect entirely into the gep is the ideal result as that unblocks other optimisations that are not aware of ptrmask. In some other cases the gep is known to be dead and is removed without changing the ptrmask. In the least effective case, this transform creates a new gep with a rounded-down index and still leaves the ptrmask unchanged. That simplified gep is still a minor improvement, geps are cheap and ptrmask occurs in address calculation contexts so I don't think it's worth special casing to avoid the extra instruction.
1 parent 3714f93 commit 6157538

File tree

2 files changed

+198
-4
lines changed

2 files changed

+198
-4
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,44 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
997997
I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))
998998
return I;
999999

1000+
// Combine:
1001+
// (ptrmask (getelementptr i8, ptr p, imm i), imm mask)
1002+
// -> (ptrmask (getelementptr i8, ptr p, imm (i & mask)), imm mask)
1003+
// where only the low bits known to be zero in the pointer are changed
1004+
Value *InnerPtr;
1005+
uint64_t GEPIndex;
1006+
uint64_t PtrMaskImmediate;
1007+
if (match(I, m_Intrinsic<Intrinsic::ptrmask>(
1008+
m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)),
1009+
m_ConstantInt(PtrMaskImmediate)))) {
1010+
1011+
LHSKnown = computeKnownBits(InnerPtr, Depth + 1, I);
1012+
if (!LHSKnown.isZero()) {
1013+
const unsigned trailingZeros = LHSKnown.countMinTrailingZeros();
1014+
uint64_t PointerAlignBits = (uint64_t(1) << trailingZeros) - 1;
1015+
1016+
uint64_t HighBitsGEPIndex = GEPIndex & ~PointerAlignBits;
1017+
uint64_t MaskedLowBitsGEPIndex =
1018+
GEPIndex & PointerAlignBits & PtrMaskImmediate;
1019+
1020+
uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;
1021+
1022+
if (MaskedGEPIndex != GEPIndex) {
1023+
auto *GEP = cast<GetElementPtrInst>(II->getArgOperand(0));
1024+
Builder.SetInsertPoint(I);
1025+
Type *GEPIndexType =
1026+
DL.getIndexType(GEP->getPointerOperand()->getType());
1027+
Value *MaskedGEP = Builder.CreateGEP(
1028+
GEP->getSourceElementType(), InnerPtr,
1029+
ConstantInt::get(GEPIndexType, MaskedGEPIndex),
1030+
GEP->getName(), GEP->isInBounds());
1031+
1032+
replaceOperand(*I, 0, MaskedGEP);
1033+
return I;
1034+
}
1035+
}
1036+
}
1037+
10001038
break;
10011039
}
10021040

llvm/test/Transforms/InstCombine/ptrmask.ll

Lines changed: 160 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,12 @@ define ptr addrspace(1) @ptrmask_combine_consecutive_preserve_attrs_todo2(ptr ad
8080
define ptr @ptrmask_combine_add_nonnull(ptr %p) {
8181
; CHECK-LABEL: define ptr @ptrmask_combine_add_nonnull
8282
; CHECK-SAME: (ptr [[P:%.*]]) {
83-
; CHECK-NEXT: [[PM0:%.*]] = call align 64 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -64)
84-
; CHECK-NEXT: [[PGEP:%.*]] = getelementptr i8, ptr [[PM0]], i64 33
85-
; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP]], i64 -32)
83+
; CHECK-NEXT: [[PM0:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -60)
84+
; CHECK-NEXT: [[PGEP1:%.*]] = getelementptr i8, ptr [[PM0]], i64 32
85+
; CHECK-NEXT: [[R:%.*]] = call nonnull align 32 ptr @llvm.ptrmask.p0.i64(ptr [[PGEP1]], i64 -32)
8686
; CHECK-NEXT: ret ptr [[R]]
8787
;
88-
%pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -64)
88+
%pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -60)
8989
%pgep = getelementptr i8, ptr %pm0, i64 33
9090
%r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16)
9191
ret ptr %r
@@ -287,6 +287,162 @@ define ptr addrspace(1) @ptrmask_maintain_provenance_i32(ptr addrspace(1) %p0) {
287287
ret ptr addrspace(1) %r
288288
}
289289

290+
define ptr @ptrmask_is_nop0(ptr align 8 %p) {
291+
; CHECK-LABEL: define ptr @ptrmask_is_nop0
292+
; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
293+
; CHECK-NEXT: ret ptr [[P]]
294+
;
295+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -8)
296+
ret ptr %pm
297+
}
298+
299+
define ptr @ptrmask_is_nop1(ptr align 8 %p) {
300+
; CHECK-LABEL: define ptr @ptrmask_is_nop1
301+
; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
302+
; CHECK-NEXT: ret ptr [[P]]
303+
;
304+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -4)
305+
ret ptr %pm
306+
}
307+
308+
define ptr @ptrmask_to_modified_gep0(ptr align 8 %p) {
309+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep0
310+
; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
311+
; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -16)
312+
; CHECK-NEXT: ret ptr [[PM]]
313+
;
314+
%gep = getelementptr i8, ptr %p, i32 5
315+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16)
316+
ret ptr %pm
317+
}
318+
319+
define ptr @ptrmask_to_modified_gep1(ptr align 8 %p) {
320+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep1
321+
; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
322+
; CHECK-NEXT: ret ptr [[P]]
323+
;
324+
%gep = getelementptr i8, ptr %p, i32 6
325+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -8)
326+
ret ptr %pm
327+
}
328+
329+
define ptr @ptrmask_to_modified_gep2(ptr align 16 %p) {
330+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep2
331+
; CHECK-SAME: (ptr align 16 [[P:%.*]]) {
332+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 12
333+
; CHECK-NEXT: ret ptr [[GEP1]]
334+
;
335+
%gep = getelementptr i8, ptr %p, i32 15
336+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4)
337+
ret ptr %pm
338+
}
339+
340+
define ptr @ptrmask_to_modified_gep4(ptr align 8 %p) {
341+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep4
342+
; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
343+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 24
344+
; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[GEP1]], i64 -16)
345+
; CHECK-NEXT: ret ptr [[PM]]
346+
;
347+
%gep = getelementptr i8, ptr %p, i32 29
348+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16)
349+
ret ptr %pm
350+
}
351+
352+
define ptr @ptrmask_to_modified_gep5(ptr align 8 %p) {
353+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep5
354+
; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
355+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 24
356+
; CHECK-NEXT: ret ptr [[GEP1]]
357+
;
358+
%gep = getelementptr i8, ptr %p, i32 30
359+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -8)
360+
ret ptr %pm
361+
}
362+
363+
define ptr @ptrmask_to_modified_gep6(ptr align 16 %p) {
364+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep6
365+
; CHECK-SAME: (ptr align 16 [[P:%.*]]) {
366+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 28
367+
; CHECK-NEXT: ret ptr [[GEP1]]
368+
;
369+
%gep = getelementptr i8, ptr %p, i32 31
370+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4)
371+
ret ptr %pm
372+
}
373+
374+
define ptr @ptrmask_to_modified_gep_indirect0(ptr align 16 %p) {
375+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_indirect0
376+
; CHECK-SAME: (ptr align 16 [[P:%.*]]) {
377+
; 44 from 4*sizeof(i32) + (31 & -4)
378+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 44
379+
; CHECK-NEXT: ret ptr [[GEP1]]
380+
;
381+
%gep0 = getelementptr i32, ptr %p, i32 4
382+
%gep1 = getelementptr i8, ptr %gep0, i32 31
383+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep1, i64 -4)
384+
ret ptr %pm
385+
}
386+
387+
define ptr @ptrmask_to_modified_gep_indirect1(ptr %p) {
388+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_indirect1
389+
; CHECK-SAME: (ptr [[P:%.*]]) {
390+
391+
; CHECK-NEXT: [[R:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 -16)
392+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[R]], i64 32
393+
; CHECK-NEXT: ret ptr [[GEP]]
394+
;
395+
%pm0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 -16)
396+
%pgep = getelementptr i8, ptr %pm0, i64 33
397+
%r = call ptr @llvm.ptrmask.p0.i64(ptr %pgep, i64 -16)
398+
ret ptr %r
399+
}
400+
401+
define ptr @ptrmask_to_modified_gep_zero_argument() {
402+
; CHECK-LABEL: define ptr @ptrmask_to_modified_gep_zero_argument() {
403+
; CHECK-NEXT: [[P:%.*]] = call nonnull align 4 ptr @llvm.ptrmask.p0.i64(ptr nonnull inttoptr (i64 31 to ptr), i64 28)
404+
; CHECK-NEXT: ret ptr [[P]]
405+
;
406+
%gep = getelementptr inbounds i8, ptr null, i32 31
407+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4)
408+
ret ptr %pm
409+
}
410+
411+
define ptr @ptrmask_to_preserves_inbounds(ptr align 16 %p) {
412+
; CHECK-LABEL: define ptr @ptrmask_to_preserves_inbounds
413+
; CHECK-SAME: (ptr align 16 [[P:%.*]]) {
414+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 28
415+
; CHECK-NEXT: ret ptr [[GEP1]]
416+
;
417+
%gep = getelementptr inbounds i8, ptr %p, i32 31
418+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -4)
419+
ret ptr %pm
420+
}
421+
422+
define ptr @ptrmask_of_gep_requires_i8(ptr align 8 %p) {
423+
; CHECK-LABEL: define ptr @ptrmask_of_gep_requires_i8
424+
; CHECK-SAME: (ptr align 8 [[P:%.*]]) {
425+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr [[P]], i64 8
426+
; CHECK-NEXT: [[PM:%.*]] = call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[GEP1]], i64 -16)
427+
; CHECK-NEXT: ret ptr [[PM]]
428+
;
429+
%gep = getelementptr i16, ptr %p, i32 5
430+
%pm = call ptr @llvm.ptrmask.p0.i64(ptr %gep, i64 -16)
431+
ret ptr %pm
432+
}
433+
434+
define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented(<2 x ptr> align 8 %p) {
435+
; CHECK-LABEL: define <2 x ptr> @ptrmask_of_gep_vector_type_unimplemented
436+
; CHECK-SAME: (<2 x ptr> align 8 [[P:%.*]]) {
437+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, <2 x ptr> [[P]], i64 17
438+
; CHECK-NEXT: [[PM:%.*]] = call align 32 <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[GEP]], <2 x i64> <i64 -96, i64 -96>)
439+
; CHECK-NEXT: ret <2 x ptr> [[PM]]
440+
;
441+
%gep = getelementptr i8, <2 x ptr> %p, i32 17
442+
%pm = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %gep, <2 x i64> <i64 -96, i64 -96>)
443+
ret <2 x ptr> %pm
444+
}
445+
290446
define ptr @ptrmask_is_useless0(i64 %i, i64 %m) {
291447
; CHECK-LABEL: define ptr @ptrmask_is_useless0
292448
; CHECK-SAME: (i64 [[I:%.*]], i64 [[M:%.*]]) {

0 commit comments

Comments
 (0)