Skip to content

Commit 0513487

Browse files
committed
[X86] Use Align in reduceMaskedLoadToScalarLoad/reduceMaskedStoreToScalarStore. Correct pointer info.
If we offset the pointer, we also need to offset the pointer info Differential Revision: https://reviews.llvm.org/D87593
1 parent d9c9a74 commit 0513487

File tree

2 files changed

+18
-11
lines changed

2 files changed

+18
-11
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44446,23 +44446,26 @@ static int getOneTrueElt(SDValue V) {
4444644446
/// scalar element, and the alignment for the scalar memory access.
4444744447
static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp,
4444844448
SelectionDAG &DAG, SDValue &Addr,
44449-
SDValue &Index, unsigned &Alignment) {
44449+
SDValue &Index, Align &Alignment,
44450+
unsigned &Offset) {
4445044451
int TrueMaskElt = getOneTrueElt(MaskedOp->getMask());
4445144452
if (TrueMaskElt < 0)
4445244453
return false;
4445344454

4445444455
// Get the address of the one scalar element that is specified by the mask
4445544456
// using the appropriate offset from the base pointer.
4445644457
EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType();
44458+
Offset = 0;
4445744459
Addr = MaskedOp->getBasePtr();
4445844460
if (TrueMaskElt != 0) {
44459-
unsigned Offset = TrueMaskElt * EltVT.getStoreSize();
44461+
Offset = TrueMaskElt * EltVT.getStoreSize();
4446044462
Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset),
4446144463
SDLoc(MaskedOp));
4446244464
}
4446344465

4446444466
Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp));
44465-
Alignment = MinAlign(MaskedOp->getAlignment(), EltVT.getStoreSize());
44467+
Alignment = commonAlignment(MaskedOp->getOriginalAlign(),
44468+
EltVT.getStoreSize());
4446644469
return true;
4446744470
}
4446844471

@@ -44479,8 +44482,9 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
4447944482
// is profitable. Endianness would also have to be considered.
4448044483

4448144484
SDValue Addr, VecIndex;
44482-
unsigned Alignment;
44483-
if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment))
44485+
Align Alignment;
44486+
unsigned Offset;
44487+
if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment, Offset))
4448444488
return SDValue();
4448544489

4448644490
// Load the one scalar element that is specified by the mask using the
@@ -44489,7 +44493,8 @@ reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG,
4448944493
EVT VT = ML->getValueType(0);
4449044494
EVT EltVT = VT.getVectorElementType();
4449144495
SDValue Load =
44492-
DAG.getLoad(EltVT, DL, ML->getChain(), Addr, ML->getPointerInfo(),
44496+
DAG.getLoad(EltVT, DL, ML->getChain(), Addr,
44497+
ML->getPointerInfo().getWithOffset(Offset),
4449344498
Alignment, ML->getMemOperand()->getFlags());
4449444499

4449544500
// Insert the loaded element into the appropriate place in the vector.
@@ -44600,8 +44605,9 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
4460044605
// is profitable. Endianness would also have to be considered.
4460144606

4460244607
SDValue Addr, VecIndex;
44603-
unsigned Alignment;
44604-
if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment))
44608+
Align Alignment;
44609+
unsigned Offset;
44610+
if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment, Offset))
4460544611
return SDValue();
4460644612

4460744613
// Extract the one scalar element that is actually being stored.
@@ -44612,7 +44618,8 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
4461244618
MS->getValue(), VecIndex);
4461344619

4461444620
// Store that element at the appropriate offset from the base pointer.
44615-
return DAG.getStore(MS->getChain(), DL, Extract, Addr, MS->getPointerInfo(),
44621+
return DAG.getStore(MS->getChain(), DL, Extract, Addr,
44622+
MS->getPointerInfo().getWithOffset(Offset),
4461644623
Alignment, MS->getMemOperand()->getFlags());
4461744624
}
4461844625

llvm/test/CodeGen/X86/vmaskmov-offset.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ define <2 x double> @mload_constmask_v2f64(<2 x double>* %addr, <2 x double> %ds
5959
; CHECK: liveins: $rdi, $xmm0
6060
; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0
6161
; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
62-
; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load 8 from %ir.addr, align 4)
62+
; CHECK: [[VMOVHPDrm:%[0-9]+]]:vr128 = VMOVHPDrm [[COPY]], [[COPY1]], 1, $noreg, 8, $noreg :: (load 8 from %ir.addr + 8, align 4)
6363
; CHECK: $xmm0 = COPY [[VMOVHPDrm]]
6464
; CHECK: RET 0, $xmm0
6565
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1> <i1 0, i1 1>, <2 x double> %dst)
@@ -72,7 +72,7 @@ define void @one_mask_bit_set2(<4 x float>* %addr, <4 x float> %val) {
7272
; CHECK: liveins: $rdi, $xmm0
7373
; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0
7474
; CHECK: [[COPY1:%[0-9]+]]:gr64 = COPY $rdi
75-
; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store 4 into %ir.addr)
75+
; CHECK: VEXTRACTPSmr [[COPY1]], 1, $noreg, 8, $noreg, [[COPY]], 2 :: (store 4 into %ir.addr + 8)
7676
; CHECK: RET 0
7777
call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %val, <4 x float>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
7878
ret void

0 commit comments

Comments
 (0)