Skip to content

[SeperateConstOffsetFromGEP] Handle or disjoint flags #76997

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 14 additions & 20 deletions llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
Expand Down Expand Up @@ -235,18 +236,16 @@ class ConstantOffsetExtractor {
/// \p UserChainTail Outputs the tail of UserChain so that we can
/// garbage-collect unused instructions in UserChain.
static Value *Extract(Value *Idx, GetElementPtrInst *GEP,
User *&UserChainTail, const DominatorTree *DT);
User *&UserChainTail);

/// Looks for a constant offset from the given GEP index without extracting
/// it. It returns the numeric value of the extracted constant offset (0 if
/// failed). The meaning of the arguments are the same as Extract.
static int64_t Find(Value *Idx, GetElementPtrInst *GEP,
const DominatorTree *DT);
static int64_t Find(Value *Idx, GetElementPtrInst *GEP);

private:
ConstantOffsetExtractor(Instruction *InsertionPt, const DominatorTree *DT)
: IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()), DT(DT) {
}
ConstantOffsetExtractor(Instruction *InsertionPt)
: IP(InsertionPt), DL(InsertionPt->getModule()->getDataLayout()) {}

/// Searches the expression that computes V for a non-zero constant C s.t.
/// V can be reassociated into the form V' + C. If the searching is
Expand Down Expand Up @@ -336,7 +335,6 @@ class ConstantOffsetExtractor {
Instruction *IP;

const DataLayout &DL;
const DominatorTree *DT;
};

/// A pass that tries to split every GEP in the function into a variadic
Expand Down Expand Up @@ -519,12 +517,10 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
}

Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
// Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
// don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
// FIXME: this does not appear to be covered by any tests
// (with x86/aarch64 backends at least)
// Do not trace into "or" unless it is equivalent to "add".
// This is the case if the or's disjoint flag is set.
if (BO->getOpcode() == Instruction::Or &&
!haveNoCommonBitsSet(LHS, RHS, SimplifyQuery(DL, DT, /*AC*/ nullptr, BO)))
!cast<PossiblyDisjointInst>(BO)->isDisjoint())
return false;

// FIXME: We don't currently support constants from the RHS of subs,
Expand Down Expand Up @@ -778,9 +774,8 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
}

Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
User *&UserChainTail,
const DominatorTree *DT) {
ConstantOffsetExtractor Extractor(GEP, DT);
User *&UserChainTail) {
ConstantOffsetExtractor Extractor(GEP);
// Find a non-zero constant offset first.
APInt ConstantOffset =
Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
Expand All @@ -795,10 +790,9 @@ Value *ConstantOffsetExtractor::Extract(Value *Idx, GetElementPtrInst *GEP,
return IdxWithoutConstOffset;
}

int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
const DominatorTree *DT) {
int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP) {
// If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
return ConstantOffsetExtractor(GEP, DT)
return ConstantOffsetExtractor(GEP)
.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
GEP->isInBounds())
.getSExtValue();
Expand Down Expand Up @@ -836,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,

// Tries to extract a constant offset from this GEP index.
int64_t ConstantOffset =
ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP);
if (ConstantOffset != 0) {
NeedsExtraction = true;
// A GEP may have multiple indices. We accumulate the extracted
Expand Down Expand Up @@ -1026,7 +1020,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
Value *OldIdx = GEP->getOperand(I);
User *UserChainTail;
Value *NewIdx =
ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail, DT);
ConstantOffsetExtractor::Extract(OldIdx, GEP, UserChainTail);
if (NewIdx != nullptr) {
// Switches to the index with the constant offset removed.
GEP->setOperand(I, NewIdx);
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, ptr addrspac
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)

%bs2 = or i32 %bs1, 1
%bs2 = or disjoint i32 %bs1, 1
%ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)

%bs3 = or i32 %bs1, 2
%bs3 = or disjoint i32 %bs1, 2
%ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)

%bs4 = or i32 %bs1, 3
%bs4 = or disjoint i32 %bs1, 3
%ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
Expand All @@ -55,17 +55,17 @@ define amdgpu_cs void @test1_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2, i32, p
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0)

%bs2 = or i32 %bs1, 1
%bs2 = or disjoint i32 %bs1, 1
%ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0)

%bs3 = or i32 %bs1, 2
%bs3 = or disjoint i32 %bs1, 2
%ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0)

%bs4 = or i32 %bs1, 3
%bs4 = or disjoint i32 %bs1, 3
%ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0)
Expand All @@ -90,17 +90,17 @@ define amdgpu_cs void @test2(i32 %arg1, <4 x i32> inreg %arg2) {
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)

%bs2 = or i32 %bs1, 1
%bs2 = or disjoint i32 %bs1, 1
%ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)

%bs3 = or i32 %bs1, 2
%bs3 = or disjoint i32 %bs1, 2
%ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)

%bs4 = or i32 %bs1, 3
%bs4 = or disjoint i32 %bs1, 3
%ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
Expand All @@ -125,17 +125,17 @@ define amdgpu_cs void @test2_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2) {
%ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0)

%bs2 = or i32 %bs1, 1
%bs2 = or disjoint i32 %bs1, 1
%ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
%ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0)

%bs3 = or i32 %bs1, 2
%bs3 = or disjoint i32 %bs1, 2
%ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
%ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0)

%bs4 = or i32 %bs1, 3
%bs4 = or disjoint i32 %bs1, 3
%ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
%ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ main_body:
%25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24, !amdgpu.uniform !0
%26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0
%27 = shl i32 %23, 2
%28 = or i32 %27, 3
%28 = or disjoint i32 %27, 3
%29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28, !amdgpu.uniform !0
%30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
Expand Down Expand Up @@ -270,7 +270,7 @@ main_body:
%25 = getelementptr inbounds [0 x <8 x i32>], ptr addrspace(6) %1, i32 0, i32 %24
%26 = load <8 x i32>, ptr addrspace(6) %25, align 32, !invariant.load !0
%27 = shl i32 %23, 2
%28 = or i32 %27, 3
%28 = or disjoint i32 %27, 3
%29 = getelementptr inbounds [0 x <4 x i32>], ptr addrspace(6) %1, i32 0, i32 %28
%30 = load <4 x i32>, ptr addrspace(6) %29, align 16, !invariant.load !0
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float 0.0, <8 x i32> %26, <4 x i32> %30, i1 0, i32 0, i32 0) #8
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1157,38 +1157,38 @@ define <8 x i16> @large_vector(ptr addrspace(3) %p, i32 %idxp) {
; GFX11-NEXT: s_setpc_b64 s[30:31]
%idx = shl i32 %idxp, 4

%i.0 = or i32 %idx, 0
%i.0 = or disjoint i32 %idx, 0
%p.0 = getelementptr half, ptr addrspace(3) %p, i32 %i.0
%x.0 = load i16, ptr addrspace(3) %p.0, align 4
%v0p = insertelement <8 x i16> poison, i16 %x.0, i32 0
%i.1 = or i32 %idx, 1
%i.1 = or disjoint i32 %idx, 1
%p.1 = getelementptr half, ptr addrspace(3) %p, i32 %i.1
%x.1 = load i16, ptr addrspace(3) %p.1, align 2
%v0 = insertelement <8 x i16> %v0p, i16 %x.1, i32 1

%i.2 = or i32 %idx, 2
%i.2 = or disjoint i32 %idx, 2
%p.2 = getelementptr half, ptr addrspace(3) %p, i32 %i.2
%x.2 = load i16, ptr addrspace(3) %p.2, align 4
%v1p = insertelement <8 x i16> poison, i16 %x.2, i32 0
%i.3 = or i32 %idx, 3
%i.3 = or disjoint i32 %idx, 3
%p.3 = getelementptr half, ptr addrspace(3) %p, i32 %i.3
%x.3 = load i16, ptr addrspace(3) %p.3, align 2
%v1 = insertelement <8 x i16> %v1p, i16 %x.3, i32 1

%i.4 = or i32 %idx, 4
%i.4 = or disjoint i32 %idx, 4
%p.4 = getelementptr half, ptr addrspace(3) %p, i32 %i.4
%x.4 = load i16, ptr addrspace(3) %p.4, align 4
%v2p = insertelement <8 x i16> poison, i16 %x.4, i32 0
%i.5 = or i32 %idx, 5
%i.5 = or disjoint i32 %idx, 5
%p.5 = getelementptr half, ptr addrspace(3) %p, i32 %i.5
%x.5 = load i16, ptr addrspace(3) %p.5, align 2
%v2 = insertelement <8 x i16> %v2p, i16 %x.5, i32 1

%i.6 = or i32 %idx, 6
%i.6 = or disjoint i32 %idx, 6
%p.6 = getelementptr half, ptr addrspace(3) %p, i32 %i.6
%x.6 = load i16, ptr addrspace(3) %p.6, align 4
%v3p = insertelement <8 x i16> poison, i16 %x.6, i32 0
%i.7 = or i32 %idx, 7
%i.7 = or disjoint i32 %idx, 7
%p.7 = getelementptr half, ptr addrspace(3) %p, i32 %i.7
%x.7 = load i16, ptr addrspace(3) %p.7, align 2
%v3 = insertelement <8 x i16> %v3p, i16 %x.7, i32 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -732,7 +732,7 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no
%192 = and i64 %191, 4294967168
%193 = getelementptr inbounds i8, ptr addrspace(1) %1, i64 %192
%194 = shl nuw nsw i32 %178, 5
%195 = or i32 %194, 8
%195 = or disjoint i32 %194, 8
%196 = zext i32 %195 to i64
%197 = getelementptr inbounds i8, ptr addrspace(1) %193, i64 %196
%198 = getelementptr inbounds i8, ptr addrspace(1) %197, i64 -4
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/vector-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,11 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177
%t3 = shl nuw nsw i32 %t1, 9
%ttile_origin.2 = and i32 %t3, 130560
%tstart_offset_x_mul = shl nuw nsw i32 %t0, 1
%t4 = or i32 %ttile_origin.2, %tstart_offset_x_mul
%t6 = or i32 %t4, 1
%t8 = or i32 %t4, 128
%t4 = or disjoint i32 %ttile_origin.2, %tstart_offset_x_mul
%t6 = or disjoint i32 %t4, 1
%t8 = or disjoint i32 %t4, 128
%t9 = zext i32 %t8 to i64
%t10 = or i32 %t4, 129
%t10 = or disjoint i32 %t4, 129
%t11 = zext i32 %t10 to i64
%t20 = zext i32 %t2 to i64
%t27 = getelementptr inbounds [1024 x [131072 x i8]], ptr %alloc0, i64 0, i64 %t20, i64 %t9
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/PowerPC/mma-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ for.body: ; preds = %for.body, %for.body
%idxprom = zext i32 %mul to i64
%arrayidx = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom
%4 = load <16 x i8>, ptr %arrayidx, align 16
%add2 = or i32 %mul, 1
%add2 = or disjoint i32 %mul, 1
%idxprom3 = zext i32 %add2 to i64
%arrayidx4 = getelementptr inbounds <16 x i8>, ptr %vc, i64 %idxprom3
%5 = load <16 x i8>, ptr %arrayidx4, align 16
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/PowerPC/sched-addi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ entry:

vector.body:
%index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
%offset.idx = or i64 %index, 1
%offset.idx = or disjoint i64 %index, 1
%0 = getelementptr %_elem_type_of_x, ptr %x_rvo_based_addr_3, i64 %offset.idx, i32 0
%1 = getelementptr %_elem_type_of_a, ptr %a_rvo_based_addr_5, i64 %offset.idx, i32 0
%wide.load = load <4 x double>, ptr %1, align 8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ main_body:
%25 = getelementptr [0 x <8 x i32>], ptr addrspace(4) %1, i32 0, i32 %24, !amdgpu.uniform !0
%26 = load <8 x i32>, ptr addrspace(4) %25, align 32, !invariant.load !0
%27 = shl i32 %23, 2
%28 = or i32 %27, 3
%28 = or disjoint i32 %27, 3
%29 = getelementptr [0 x <4 x i32>], ptr addrspace(4) %1, i32 0, i32 %28, !amdgpu.uniform !0
%30 = load <4 x i32>, ptr addrspace(4) %29, align 16, !invariant.load !0
%31 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %30, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ define ptr @sext_or(i64 %a, i32 %b) {
;
entry:
%b1 = shl i32 %b, 2
%b2 = or i32 %b1, 1 ; (b << 2) and 1 have no common bits
%b2 = or disjoint i32 %b1, 1 ; (b << 2) and 1 have no common bits
%b3 = or i32 %b1, 4 ; (b << 2) and 4 may have common bits
%b2.ext = zext i32 %b2 to i64
%b3.ext = sext i32 %b3 to i64
Expand Down Expand Up @@ -335,7 +335,7 @@ define ptr @shl_add_or(i64 %a, ptr %ptr) {
entry:
%shl = shl i64 %a, 2
%add = add i64 %shl, 12
%or = or i64 %add, 1
%or = or disjoint i64 %add, 1
; ((a << 2) + 12) and 1 have no common bits. Therefore,
; SeparateConstOffsetFromGEP is able to extract the 12.
; TODO(jingyue): We could reassociate the expression to combine 12 and 1.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,17 @@ define void @testOrDoesntSplit(ptr %p) {
ret void
}

define void @testNoBitsInCommonOrSplits(ptr %p) {
; CHECK-LABEL: define void @testNoBitsInCommonOrSplits(
; COM: The check for `or disjoint` removed the old hasNoBitsInCommon()
; COM: check, ensure that failing to annotate an or with disjoint makes
; COM: the optimization fail.
define void @testNoBitsInCommonOrDoesntSplit(ptr %p) {
; CHECK-LABEL: define void @testNoBitsInCommonOrDoesntSplit(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
; CHECK-NEXT: [[VAR_HIGH:%.*]] = and i64 [[VAR]], -16
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR_HIGH]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
; CHECK-NEXT: [[OFF:%.*]] = or i64 [[VAR_HIGH]], 10
; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFF]]
; CHECK-NEXT: store i8 0, ptr [[Q]], align 1
; CHECK-NEXT: ret void
;
%var = tail call i64 @foo()
Expand All @@ -46,9 +47,11 @@ define void @testDisjointOrSplits(ptr %p) {
; CHECK-LABEL: define void @testDisjointOrSplits(
; CHECK-SAME: ptr [[P:%.*]]) {
; CHECK-NEXT: [[VAR:%.*]] = tail call i64 @foo()
; CHECK-NEXT: [[OFF:%.*]] = or disjoint i64 [[VAR]], 10
; CHECK-NEXT: [[Q:%.*]] = getelementptr i8, ptr [[P]], i64 [[OFF]]
; CHECK-NEXT: store i8 0, ptr [[Q]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], [[VAR]]
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 10
; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
; CHECK-NEXT: store i8 0, ptr [[TMP4]], align 1
; CHECK-NEXT: ret void
;
%var = tail call i64 @foo()
Expand Down