-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[InstCombine] Add combines/simplifications for llvm.ptrmask
#67166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
llvm.ptrmask
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-analysis Changes
Patch is 31.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67166.diff 11 Files Affected:
diff --git a/clang/test/CodeGen/arm64_32-vaarg.c b/clang/test/CodeGen/arm64_32-vaarg.c
index 9fbcf88ecfdcc33..3f1f4443436da15 100644
--- a/clang/test/CodeGen/arm64_32-vaarg.c
+++ b/clang/test/CodeGen/arm64_32-vaarg.c
@@ -29,7 +29,7 @@ long long test_longlong(OneLongLong input, va_list *mylist) {
// CHECK-LABEL: define{{.*}} i64 @test_longlong(i64 %input
// CHECK: [[STARTPTR:%.*]] = load ptr, ptr %mylist
// CHECK: [[ALIGN_TMP:%.+]] = getelementptr inbounds i8, ptr [[STARTPTR]], i32 7
- // CHECK: [[ALIGNED_ADDR:%.+]] = tail call ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8)
+ // CHECK: [[ALIGNED_ADDR:%.+]] = tail call align 8 ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8)
// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_ADDR]], i32 8
// CHECK: store ptr [[NEXT]], ptr %mylist
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index d8aa614cae53b10..1049845d97844a7 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6397,6 +6397,48 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return Constant::getNullValue(ReturnType);
break;
}
+ case Intrinsic::ptrmask: {
+ // Fail loudly in case this is ever changed.
+ // TODO: If vector types are supported the logic that checks if the mask is
+ // useless should be updated to use generic constants.
+ assert(!Op0->getType()->isVectorTy() && !Op1->getType()->isVectorTy() &&
+ "These simplifications where written at a time when ptrmask did not "
+ "support vector types and may not work for vectors");
+
+ // NOTE: We can't apply these simplifications based on the value of Op1
+ // because we need to preserve provenance.
+ if (isa<PoisonValue>(Op0))
+ return Op0;
+
+ if (Q.isUndefValue(Op0))
+ return Constant::getNullValue(Op0->getType());
+
+ if (match(Op0, m_Zero()))
+ return Constant::getNullValue(Op0->getType());
+
+ if (Op1->getType()->getScalarSizeInBits() ==
+ Q.DL.getPointerTypeSizeInBits(Op0->getType())) {
+ if (match(Op1, m_PtrToInt(m_Specific(Op0))))
+ return Op0;
+
+ // TODO: We may have attributes assosiated with the return value of the
+ // llvm.ptrmask intrinsic that will be lost when we just return the
+ // operand. We should try to preserve them.
+ if (match(Op1, m_AllOnes()))
+ return Op0;
+
+ const APInt *C;
+ if (match(Op1, m_APInt(C))) {
+ KnownBits PtrKnown =
+ computeKnownBits(Op0, Q.DL, /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT);
+ // See if we only masking off bits we know are already zero due to
+ // alignment.
+ if ((*C | PtrKnown.Zero).isAllOnes())
+ return Op0;
+ }
+ }
+ break;
+ }
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c6100f24b0507de..e0a8b30b34a8ff0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1964,18 +1964,78 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::ptrmask: {
+ KnownBits Known(DL.getPointerTypeSizeInBits(II->getType()));
+ if (SimplifyDemandedInstructionBits(*II, Known))
+ return II;
+
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ // Fail loudly in case this is ever changed.
+ // TODO: If vector types are supported the merging of (ptrmask (ptrmask))
+ // need to ensure we don't merge a vectype with non-vec type.
+ assert(!Op0->getType()->isVectorTy() && !Op1->getType()->isVectorTy() &&
+ "These combines where written at a time when ptrmask did not "
+ "support vector types and may not work for vectors");
+
Value *InnerPtr, *InnerMask;
- if (match(II->getArgOperand(0),
- m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
- m_Value(InnerMask))))) {
- if (II->getArgOperand(1)->getType() == InnerMask->getType()) {
- Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
- return replaceInstUsesWith(
- *II,
- Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
- {InnerPtr, NewMask}));
+ bool Changed = false;
+ // Combine:
+ // (ptrmask (ptrmask p, A), B)
+ // -> (ptrmask p, (and A, B))
+ if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(
+ m_Value(InnerPtr), m_Value(InnerMask))))) {
+ // See if combining the two masks is free.
+ bool OkayToMerge = InnerMask->getType() == Op1->getType();
+ bool NeedsNew = false;
+ if (!OkayToMerge) {
+ if (match(InnerMask, m_ImmConstant())) {
+ InnerMask = Builder.CreateZExtOrTrunc(InnerMask, Op1->getType());
+ OkayToMerge = true;
+ } else if (match(Op1, m_ImmConstant())) {
+ Op1 = Builder.CreateZExtOrTrunc(Op1, InnerMask->getType());
+ OkayToMerge = true;
+ // Need to create a new one here, as the intrinsic id needs to change.
+ NeedsNew = true;
+ }
+ }
+ if (InnerMask->getType() == Op1->getType()) {
+ // TODO: If InnerMask == Op1, we could copy attributes from inner
+ // callsite -> outer callsite.
+ Value *NewMask = Builder.CreateAnd(Op1, InnerMask);
+ if (NeedsNew)
+ return replaceInstUsesWith(
+ *II,
+ Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
+ {InnerPtr, NewMask}));
+
+ replaceOperand(CI, 0, InnerPtr);
+ replaceOperand(CI, 1, NewMask);
+ Changed = true;
}
}
+
+ // See if we can deduce non-null.
+ if (!CI.hasRetAttr(Attribute::NonNull) &&
+ (Known.isNonZero() ||
+ isKnownNonZero(II, DL, /*Depth*/ 0, &AC, II, &DT))) {
+ CI.addRetAttr(Attribute::NonNull);
+ Changed = true;
+ }
+
+ // Known bits will capture if we had alignment information assosiated with
+ // the pointer argument.
+ if (Known.countMinTrailingZeros() > Log2(CI.getRetAlign().valueOrOne())) {
+ if (CI.hasRetAttr(Attribute::Alignment))
+ CI.removeRetAttr(Attribute::Alignment);
+ CI.addRetAttr(Attribute::get(
+ CI.getContext(), Attribute::Alignment,
+ uint64_t(1) << (Known.isZero() ? (Known.getBitWidth() - 1)
+ : Known.countMinTrailingZeros())));
+
+ Changed = true;
+ }
+ if (Changed)
+ return &CI;
break;
}
case Intrinsic::uadd_with_overflow:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 8ff61ab36da307b..508609670b9ac88 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1955,6 +1955,15 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
+ // (ptrtoint (ptrmask P, M))
+ // -> (and (ptrtoint P), M)
+ // This is generally beneficial as `and` is better supported than `ptrmask`.
+ Value *Ptr, *Mask;
+ if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
+ m_Value(Mask)))) &&
+ Mask->getType() == Ty)
+ return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
+
if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
// Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
// While this can increase the number of instructions it doesn't actually
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 40c24d87bfec508..0b03aa48fa0c0cc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -526,6 +526,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
/// Tries to simplify operands to an integer instruction based on its
/// demanded bits.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
+ bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index be005e61a8d2d89..5a5334b1347a95d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -48,15 +48,19 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
return true;
}
+/// Returns the bitwidth of the given scalar or pointer type. For vector types,
+/// returns the element type's bitwidth.
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
+ if (unsigned BitWidth = Ty->getScalarSizeInBits())
+ return BitWidth;
+ return DL.getPointerTypeSizeInBits(Ty);
+}
/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
/// the instruction has any properties that allow us to simplify its operands.
-bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
- unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- APInt DemandedMask(APInt::getAllOnes(BitWidth));
-
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known) {
+ APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));
Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
0, &Inst);
if (!V) return false;
@@ -65,6 +69,13 @@ bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
return true;
}
+/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
+/// the instruction has any properties that allow us to simplify its operands.
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
+ KnownBits Known(getBitWidth(Inst.getType(), DL));
+ return SimplifyDemandedInstructionBits(Inst, Known);
+}
+
/// This form of SimplifyDemandedBits simplifies the specified instruction
/// operand if possible, updating it in place. It returns true if it made any
/// change and false otherwise.
@@ -898,6 +909,53 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
break;
}
+ case Intrinsic::ptrmask: {
+ // Fail loudly in case this is ever changed.
+ // Likely not much needs to be changed here to support vector types.
+ assert(!I->getOperand(0)->getType()->isVectorTy() &&
+ !I->getOperand(1)->getType()->isVectorTy() &&
+ "These simplifications where written at a time when ptrmask did "
+ "not support vector types and may not work for vectors");
+
+ unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();
+ RHSKnown = KnownBits(MaskWidth);
+ // If either the LHS or the RHS are Zero, the result is zero.
+ if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(
+ I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),
+ RHSKnown, Depth + 1))
+ return I;
+
+ RHSKnown = RHSKnown.zextOrTrunc(BitWidth);
+ assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
+ assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
+
+ Known = LHSKnown & RHSKnown;
+ KnownBitsComputed = DemandedMask.isAllOnes();
+
+ // If the client is only demanding bits we know to be zero, return
+ // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
+ // provenance, but making the mask zero will be easily optimizable in
+ // the backend.
+ if (DemandedMask.isSubsetOf(Known.Zero))
+ return replaceOperand(
+ *I, 1, Constant::getNullValue(I->getOperand(1)->getType()));
+
+ // Mask in demanded space does nothing.
+ // TODO: We may have attributes assosiated with the return value of the
+ // llvm.ptrmask intrinsic that will be lost when we just return the
+ // operand. We should try to preserve them.
+ if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return I->getOperand(0);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(
+ I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))
+ return I;
+
+ break;
+ }
+
case Intrinsic::fshr:
case Intrinsic::fshl: {
const APInt *SA;
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
index c9db0656c6b7dd4..561a5ff35ba1082 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
@@ -320,8 +320,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffffffffffe(ptr addrspace(
define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(
-; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 -1)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR:%.*]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -333,7 +332,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace(
; Make sure non-constant masks can also be handled.
define i8 @ptrmask_cast_local_to_flat_load_range_mask(ptr addrspace(3) %src.ptr, ptr addrspace(1) %mask.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_load_range_mask(
-; CHECK-NEXT: [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range !0
+; CHECK-NEXT: [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range [[RNG0:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[LOAD_MASK]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 [[TMP1]])
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP2]], align 1
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
index 23f620310d7c26c..f3b33f013a9b5d4 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -134,7 +134,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask)
define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_align_unknown_ptr_align8(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -146,7 +146,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask)
; Increase load align from 1 to 2
define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align2_ptr_align1(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 2 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -158,7 +158,7 @@ define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) {
; Increase load align from 1 to 4
define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align4_ptr_align1(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -170,7 +170,7 @@ define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) {
; Increase load align from 1 to 8
define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align1(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -183,8 +183,7 @@ define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) {
; TODO: Should be able to drop the ptrmask
define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align8(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
-; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
+; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
%aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
@@ -196,8 +195,7 @@ define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) {
; TODO: Should be able to drop the ptrmask
define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align16(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
-; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
+; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
%aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
@@ -209,7 +207,7 @@ define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) {
; than the pointer size.
define <16 x i8> @ptrmask_align8_ptr_align1_smallmask(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align1_smallmask(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -8)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -8)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -222,7 +220,7 @@ define <16 x i8> @ptrmask_align8_ptr_align1_smallmask(ptr align 1 %ptr) {
; than the pointer size.
define <16 x i8> @ptrmask_align8_ptr_align1_bigmask(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align1_bigmask(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i128(ptr [[PTR:%.*]], i128 -8)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i128(ptr [[PTR:%.*]], i128 18446744073709551608)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
index 904c758b99306f4..0723fe2fd4239c4 100644
--- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
@@ -65,3 +65,40 @@ define ptr @fold_2x_fail_type_mismatch2(ptr %p, i64 %m0, i32 %m1) {
%p1 = call ptr @llvm.ptrmask.p0.i32(ptr %p0, i32 %m1)
ret ptr %p1
}
+
+
+define ptr @fold_2x_type_mismatch_const0(ptr %p, i32 %m1) {
+; CHECK-LABEL: define ptr @fold_2x_type_mismatch_const0
+; CHECK-SAME: (ptr [[P:%.*]], i32 [[M1:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[M1]], -128
+; CHECK-NEXT: ...
[truncated]
|
@nikic, think fixed all issues with |
You can test this locally with the following command:git-clang-format --diff 09f1aaca0bdc260c29043188aa2f63bcbe07f02f 2efd23c399922eb8839bdf79915a0c2ec0ec04a5 -- clang/test/CodeGen/arm64_32-vaarg.c llvm/lib/Analysis/InstructionSimplify.cpp llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp llvm/lib/Transforms/InstCombine/InstCombineInternal.h llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp View the diff from clang-format here.diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index fa6fe9d30abd..07beef0309df 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1034,7 +1034,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// constant. We can't directly simplify pointers as a constant because of
// pointer provenance.
// TODO: We could return `(inttoptr const)` for pointers.
- if (!V->getType()->isPointerTy() && DemandedMask.isSubsetOf(Known.Zero | Known.One))
+ if (!V->getType()->isPointerTy() &&
+ DemandedMask.isSubsetOf(Known.Zero | Known.One))
return Constant::getIntegerValue(VTy, Known.One);
return nullptr;
}
|
7a93fe3
to
a6f3bc1
Compare
llvm.ptrmask
llvm.ptrmask
a6f3bc1
to
a3283a3
Compare
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
a3283a3
to
20416a4
Compare
llvm.ptrmask is currently limited to pointers only, and does not accept vectors of pointers. This is an unnecessary limitation, especially as the underlying instructions (getelementptr etc) do support vectors of pointers. We should relax this sooner rather than later, to avoid introducing code that assumes non-vectors (llvm#67166).
20416a4
to
d31b5c7
Compare
llvm.ptrmask is currently limited to pointers only, and does not accept vectors of pointers. This is an unnecessary limitation, especially as the underlying instructions (getelementptr etc) do support vectors of pointers. We should relax this sooner rather than later, to avoid introducing code that assumes non-vectors (#67166).
d31b5c7
to
515610b
Compare
Okay, rebased ontop of your vec support patches + added vec support/tests. |
llvm.ptrmask is currently limited to pointers only, and does not accept vectors of pointers. This is an unnecessary limitation, especially as the underlying instructions (getelementptr etc) do support vectors of pointers. We should relax this sooner rather than later, to avoid introducing code that assumes non-vectors (llvm#67166).
ping. |
As in disallow say
|
Yes, exactly. I've started working on a patch at #69343, but it's not quite done yet. |
Okay, Ill wait on LangRef update to get this in. |
a0f2ce2
to
bbbaf28
Compare
Rebased ontop of nikic's patch |
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
Outdated
Show resolved
Hide resolved
b95b796
to
7b835e9
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All my comments have been resolved, thank you very much!
The InstCombine/ptrmask.ll test is failing in CI. |
Differential Revision: https://reviews.llvm.org/D156632
Bah, forgot to rebase the instcombine ptrmask tests ontop of simplication fix. |
cfc06ef
to
7f87e98
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
KnownBits PtrKnown = computeKnownBits(Op0, /*Depth=*/0, Q); | ||
// See if we only masking off bits we know are already zero due to | ||
// alignment. | ||
APInt IrrelivantPtrBits = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Irrelevant
// constant. | ||
if (DemandedMask.isSubsetOf(Known.Zero|Known.One)) | ||
// constant. We can't directly simplify pointers as a constant because of | ||
// pointer provanance. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
provenance
Mostly the same as `and`. We also have a check for a useless `llvm.ptrmask` if the ptr is already known aligned. Differential Revision: https://reviews.llvm.org/D156633
Differential Revision: https://reviews.llvm.org/D156634
Logic basically copies 'and' but we can't return a constant if the result == rhs (mask) so that case is skipped.
…m.ptrmask` We can deduce the former based on the mask / incoming pointer alignment. We can set the latter based if know the result in non-zero (this is essentially just caching our analysis result). Differential Revision: https://reviews.llvm.org/D156636
If we have assosiated attributes i.e `([ret_attrs] (ptrmask (ptrmask p0, m0), m1))` we should preserve `[ret_attrs]` when combining the two `llvm.ptrmask`s. Differential Revision: https://reviews.llvm.org/D156638
…0), m0)` `and` is generally more supported so if we have a `ptrmask` anyways might as well use `and`. Differential Revision: https://reviews.llvm.org/D156640
7f87e98
to
2efd23c
Compare
Since PR's llvm#69343 and llvm#67166 we probably have enough support for `llvm.ptrmask` to make it preferable to the GEP stategy.
llvm.ptrmask
; NFCllvm.ptrmask
llvm.ptrmask
; NFCalign
andnonnull
return attributes forllvm.ptrmask
SimplifyDemandedBits
forllvm.ptrmask
llvm.ptrmask
llvm.ptrmask
with different mask types if a mask is constant.(ptrtoint (ptrmask p0, m0))
->(and (ptrtoint p0), m0)