Skip to content

[X86][APX] Convert store(cmov(load(x), y), x) to cstore(y, x) #118946

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 7, 2024

Conversation

phoebewang
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Dec 6, 2024

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/118946.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+54)
  • (modified) llvm/test/CodeGen/X86/apx/cfcmov.ll (+41)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2d3bb2ece621e0..7c3d0ded4fce7f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2927,6 +2927,24 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
   }
 }
 
+static X86::CondCode getInvertedX86CC(X86::CondCode CC) {
+  switch (CC) {
+    // clang-format off
+  default: llvm_unreachable("Invalid integer condition!");
+  case X86::COND_E  : return X86::COND_NE;
+  case X86::COND_G  : return X86::COND_LE;
+  case X86::COND_GE : return X86::COND_L;
+  case X86::COND_L  : return X86::COND_GE;
+  case X86::COND_LE : return X86::COND_G;
+  case X86::COND_NE : return X86::COND_E;
+  case X86::COND_B  : return X86::COND_AE;
+  case X86::COND_A  : return X86::COND_BE;
+  case X86::COND_BE : return X86::COND_A;
+  case X86::COND_AE : return X86::COND_B;
+    // clang-format on
+  }
+}
+
 /// Do a one-to-one translation of a ISD::CondCode to the X86-specific
 /// condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
@@ -52786,6 +52804,42 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // Convert store(cmov(load(x), y), x) to cstore(y, x).
+  if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
+      Subtarget.hasCF() && St->isSimple()) {
+    SDValue Cmov;
+    if (StoredVal.getOpcode() == X86ISD::CMOV)
+      Cmov = StoredVal;
+    else if (StoredVal.getOpcode() == ISD::TRUNCATE &&
+             StoredVal.getOperand(0).getOpcode() == X86ISD::CMOV)
+      Cmov = StoredVal.getOperand(0);
+    else
+      return SDValue();
+
+    auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
+    if (!Ld || !Ld->isSimple() || Ld->getBasePtr() != St->getBasePtr())
+      return SDValue();
+
+    bool InvertCC = false;
+    SDValue V = SDValue(Ld, 0);
+    if (V == Cmov.getOperand(1))
+      InvertCC = true;
+    else if (V != Cmov.getOperand(0))
+      return SDValue();
+
+    SDVTList Tys = DAG.getVTList(MVT::Other);
+    SDValue CC = Cmov.getOperand(2);
+    SDValue Src = DAG.getAnyExtOrTrunc(Cmov.getOperand(!InvertCC), dl, VT);
+    if (InvertCC)
+      CC = DAG.getTargetConstant(
+          getInvertedX86CC((X86::CondCode)Cmov.getConstantOperandVal(2)), dl,
+          MVT::i8);
+    SDValue Ops[] = {St->getChain(), Src, St->getBasePtr(), CC,
+                     Cmov.getOperand(3)};
+    return DAG.getMemIntrinsicNode(X86ISD::CSTORE, dl, Tys, Ops, VT,
+                                   St->getMemOperand());
+  }
+
   // Turn load->store of MMX types into GPR load/stores.  This avoids clobbering
   // the FP state in cases where an emms may be missing.
   // A preferable solution to the general problem is to figure out the right
diff --git a/llvm/test/CodeGen/X86/apx/cfcmov.ll b/llvm/test/CodeGen/X86/apx/cfcmov.ll
index f643120c9b50ff..22cd9534c9aafd 100644
--- a/llvm/test/CodeGen/X86/apx/cfcmov.ll
+++ b/llvm/test/CodeGen/X86/apx/cfcmov.ll
@@ -93,3 +93,44 @@ define i64 @cfcmov64rr_inv(i64 %0) {
   %3 = select i1 %2, i64 0, i64 %0
   ret i64 %3
 }
+
+define void @cfcmov16mr(ptr %p, i16 %0) {
+; CHECK-LABEL: cfcmov16mr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    cmpw %ax, %si
+; CHECK-NEXT:    cfcmovlew %si, (%rdi)
+; CHECK-NEXT:    retq
+  %2 = load i16, ptr %p, align 2
+  %3 = icmp sgt i16 %0, %2
+  %4 = select i1 %3, i16 %2, i16 %0
+  store i16 %4, ptr %p, align 2
+  ret void
+}
+
+define void @cfcmov32mr(ptr %p, i32 %0) {
+; CHECK-LABEL: cfcmov32mr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpl (%rdi), %esi
+; CHECK-NEXT:    cfcmovgl %esi, (%rdi)
+; CHECK-NEXT:    retq
+  %2 = load i32, ptr %p, align 4
+  %3 = call i32 @llvm.smax.i32(i32 %0, i32 %2)
+  store i32 %3, ptr %p, align 4
+  ret void
+}
+
+define void @cfcmov64mr(ptr %p, i64 %0) {
+; CHECK-LABEL: cfcmov64mr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpq (%rdi), %rsi
+; CHECK-NEXT:    cfcmovgq %rsi, (%rdi)
+; CHECK-NEXT:    retq
+  %2 = load i64, ptr %p, align 2
+  %3 = icmp sgt i64 %0, %2
+  %4 = select i1 %3, i64 %0, i64 %2
+  store i64 %4, ptr %p, align 2
+  ret void
+}
+
+declare i32 @llvm.smax.i32(i32, i32)

@@ -2927,6 +2927,24 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
}
}

static X86::CondCode getInvertedX86CC(X86::CondCode CC) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have this in X86InstroInfo as X86::GetOppositeBranchCondition - can we share a helper in X86BaseInfo.h?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great, thank! I had a quick search but didn't find it 🤦‍♀️

Copy link
Contributor

@KanRobert KanRobert left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with one suggestion

@phoebewang phoebewang merged commit 943cc71 into llvm:main Dec 7, 2024
8 checks passed
@phoebewang phoebewang deleted the APX branch December 7, 2024 03:38
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants