-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[X86][APX] Convert store(cmov(load(x), y), x) to cstore(y, x) #118946
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesFull diff: https://github.com/llvm/llvm-project/pull/118946.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2d3bb2ece621e0..7c3d0ded4fce7f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2927,6 +2927,24 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
}
}
+static X86::CondCode getInvertedX86CC(X86::CondCode CC) {
+ switch (CC) {
+ // clang-format off
+ default: llvm_unreachable("Invalid integer condition!");
+ case X86::COND_E : return X86::COND_NE;
+ case X86::COND_G : return X86::COND_LE;
+ case X86::COND_GE : return X86::COND_L;
+ case X86::COND_L : return X86::COND_GE;
+ case X86::COND_LE : return X86::COND_G;
+ case X86::COND_NE : return X86::COND_E;
+ case X86::COND_B : return X86::COND_AE;
+ case X86::COND_A : return X86::COND_BE;
+ case X86::COND_BE : return X86::COND_A;
+ case X86::COND_AE : return X86::COND_B;
+ // clang-format on
+ }
+}
+
/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
/// condition code, returning the condition code and the LHS/RHS of the
/// comparison to make.
@@ -52786,6 +52804,42 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
}
}
+ // Convert store(cmov(load(x), y), x) to cstore(y, x).
+ if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+ Subtarget.hasCF() && St->isSimple()) {
+ SDValue Cmov;
+ if (StoredVal.getOpcode() == X86ISD::CMOV)
+ Cmov = StoredVal;
+ else if (StoredVal.getOpcode() == ISD::TRUNCATE &&
+ StoredVal.getOperand(0).getOpcode() == X86ISD::CMOV)
+ Cmov = StoredVal.getOperand(0);
+ else
+ return SDValue();
+
+ auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
+ if (!Ld || !Ld->isSimple() || Ld->getBasePtr() != St->getBasePtr())
+ return SDValue();
+
+ bool InvertCC = false;
+ SDValue V = SDValue(Ld, 0);
+ if (V == Cmov.getOperand(1))
+ InvertCC = true;
+ else if (V != Cmov.getOperand(0))
+ return SDValue();
+
+ SDVTList Tys = DAG.getVTList(MVT::Other);
+ SDValue CC = Cmov.getOperand(2);
+ SDValue Src = DAG.getAnyExtOrTrunc(Cmov.getOperand(!InvertCC), dl, VT);
+ if (InvertCC)
+ CC = DAG.getTargetConstant(
+ getInvertedX86CC((X86::CondCode)Cmov.getConstantOperandVal(2)), dl,
+ MVT::i8);
+ SDValue Ops[] = {St->getChain(), Src, St->getBasePtr(), CC,
+ Cmov.getOperand(3)};
+ return DAG.getMemIntrinsicNode(X86ISD::CSTORE, dl, Tys, Ops, VT,
+ St->getMemOperand());
+ }
+
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
// the FP state in cases where an emms may be missing.
// A preferable solution to the general problem is to figure out the right
diff --git a/llvm/test/CodeGen/X86/apx/cfcmov.ll b/llvm/test/CodeGen/X86/apx/cfcmov.ll
index f643120c9b50ff..22cd9534c9aafd 100644
--- a/llvm/test/CodeGen/X86/apx/cfcmov.ll
+++ b/llvm/test/CodeGen/X86/apx/cfcmov.ll
@@ -93,3 +93,44 @@ define i64 @cfcmov64rr_inv(i64 %0) {
%3 = select i1 %2, i64 0, i64 %0
ret i64 %3
}
+
+define void @cfcmov16mr(ptr %p, i16 %0) {
+; CHECK-LABEL: cfcmov16mr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzwl (%rdi), %eax
+; CHECK-NEXT: cmpw %ax, %si
+; CHECK-NEXT: cfcmovlew %si, (%rdi)
+; CHECK-NEXT: retq
+ %2 = load i16, ptr %p, align 2
+ %3 = icmp sgt i16 %0, %2
+ %4 = select i1 %3, i16 %2, i16 %0
+ store i16 %4, ptr %p, align 2
+ ret void
+}
+
+define void @cfcmov32mr(ptr %p, i32 %0) {
+; CHECK-LABEL: cfcmov32mr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpl (%rdi), %esi
+; CHECK-NEXT: cfcmovgl %esi, (%rdi)
+; CHECK-NEXT: retq
+ %2 = load i32, ptr %p, align 4
+ %3 = call i32 @llvm.smax.i32(i32 %0, i32 %2)
+ store i32 %3, ptr %p, align 4
+ ret void
+}
+
+define void @cfcmov64mr(ptr %p, i64 %0) {
+; CHECK-LABEL: cfcmov64mr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: cmpq (%rdi), %rsi
+; CHECK-NEXT: cfcmovgq %rsi, (%rdi)
+; CHECK-NEXT: retq
+ %2 = load i64, ptr %p, align 2
+ %3 = icmp sgt i64 %0, %2
+ %4 = select i1 %3, i64 %0, i64 %2
+ store i64 %4, ptr %p, align 2
+ ret void
+}
+
+declare i32 @llvm.smax.i32(i32, i32)
|
@@ -2927,6 +2927,24 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { | |||
} | |||
} | |||
|
|||
static X86::CondCode getInvertedX86CC(X86::CondCode CC) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We already have this in X86InstroInfo as X86::GetOppositeBranchCondition - can we share a helper in X86BaseInfo.h?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Great, thank! I had a quick search but didn't find it 🤦♀️
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one suggestion
No description provided.