Skip to content

Commit 943cc71

Browse files
authored
[X86][APX] Convert store(cmov(load(x), y), x) to cstore(y, x) (#118946)
1 parent bcf6f84 commit 943cc71

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52820,6 +52820,44 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
5282052820
}
5282152821
}
5282252822

52823+
// Convert store(cmov(load(p), x, CC), p) to cstore(x, p, CC)
52824+
// store(cmov(x, load(p), CC), p) to cstore(x, p, InvertCC)
52825+
if ((VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) &&
52826+
Subtarget.hasCF() && St->isSimple()) {
52827+
SDValue Cmov;
52828+
if (StoredVal.getOpcode() == X86ISD::CMOV)
52829+
Cmov = StoredVal;
52830+
else if (StoredVal.getOpcode() == ISD::TRUNCATE &&
52831+
StoredVal.getOperand(0).getOpcode() == X86ISD::CMOV)
52832+
Cmov = StoredVal.getOperand(0);
52833+
else
52834+
return SDValue();
52835+
52836+
auto *Ld = dyn_cast<LoadSDNode>(St->getChain());
52837+
if (!Ld || !Ld->isSimple() || Ld->getBasePtr() != St->getBasePtr())
52838+
return SDValue();
52839+
52840+
bool InvertCC = false;
52841+
SDValue V = SDValue(Ld, 0);
52842+
if (V == Cmov.getOperand(1))
52843+
InvertCC = true;
52844+
else if (V != Cmov.getOperand(0))
52845+
return SDValue();
52846+
52847+
SDVTList Tys = DAG.getVTList(MVT::Other);
52848+
SDValue CC = Cmov.getOperand(2);
52849+
SDValue Src = DAG.getAnyExtOrTrunc(Cmov.getOperand(!InvertCC), dl, VT);
52850+
if (InvertCC)
52851+
CC = DAG.getTargetConstant(
52852+
GetOppositeBranchCondition(
52853+
(X86::CondCode)Cmov.getConstantOperandVal(2)),
52854+
dl, MVT::i8);
52855+
SDValue Ops[] = {St->getChain(), Src, St->getBasePtr(), CC,
52856+
Cmov.getOperand(3)};
52857+
return DAG.getMemIntrinsicNode(X86ISD::CSTORE, dl, Tys, Ops, VT,
52858+
St->getMemOperand());
52859+
}
52860+
5282352861
// Turn load->store of MMX types into GPR load/stores. This avoids clobbering
5282452862
// the FP state in cases where an emms may be missing.
5282552863
// A preferable solution to the general problem is to figure out the right

llvm/test/CodeGen/X86/apx/cfcmov.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,3 +93,91 @@ define i64 @cfcmov64rr_inv(i64 %0) {
9393
%3 = select i1 %2, i64 0, i64 %0
9494
ret i64 %3
9595
}
96+
97+
define void @cfcmov16mr(ptr %p, i16 %0) {
98+
; CHECK-LABEL: cfcmov16mr:
99+
; CHECK: # %bb.0:
100+
; CHECK-NEXT: movzwl (%rdi), %eax
101+
; CHECK-NEXT: cmpw %ax, %si
102+
; CHECK-NEXT: cfcmovlew %si, (%rdi)
103+
; CHECK-NEXT: retq
104+
%2 = load i16, ptr %p, align 2
105+
%3 = icmp sgt i16 %0, %2
106+
%4 = select i1 %3, i16 %2, i16 %0
107+
store i16 %4, ptr %p, align 2
108+
ret void
109+
}
110+
111+
define void @cfcmov32mr(ptr %p, i32 %0) {
112+
; CHECK-LABEL: cfcmov32mr:
113+
; CHECK: # %bb.0:
114+
; CHECK-NEXT: cmpl (%rdi), %esi
115+
; CHECK-NEXT: cfcmovgl %esi, (%rdi)
116+
; CHECK-NEXT: retq
117+
%2 = load i32, ptr %p, align 4
118+
%3 = call i32 @llvm.smax.i32(i32 %0, i32 %2)
119+
store i32 %3, ptr %p, align 4
120+
ret void
121+
}
122+
123+
define void @cfcmov64mr(ptr %p, i64 %0) {
124+
; CHECK-LABEL: cfcmov64mr:
125+
; CHECK: # %bb.0:
126+
; CHECK-NEXT: cmpq (%rdi), %rsi
127+
; CHECK-NEXT: cfcmovgq %rsi, (%rdi)
128+
; CHECK-NEXT: retq
129+
%2 = load i64, ptr %p, align 8
130+
%3 = icmp sgt i64 %0, %2
131+
%4 = select i1 %3, i64 %0, i64 %2
132+
store i64 %4, ptr %p, align 8
133+
ret void
134+
}
135+
136+
define void @volatileload(ptr %p, i32 %0) {
137+
; CHECK-LABEL: volatileload:
138+
; CHECK: # %bb.0:
139+
; CHECK-NEXT: movl (%rdi), %eax
140+
; CHECK-NEXT: cmpl %eax, %esi
141+
; CHECK-NEXT: cmovbl %esi, %eax
142+
; CHECK-NEXT: movl %eax, (%rdi)
143+
; CHECK-NEXT: retq
144+
%2 = load volatile i32, ptr %p, align 4
145+
%3 = call i32 @llvm.umin.i32(i32 %0, i32 %2)
146+
store i32 %3, ptr %p, align 4
147+
ret void
148+
}
149+
150+
define void @atomicstore(ptr %p, i64 %0) {
151+
; CHECK-LABEL: atomicstore:
152+
; CHECK: # %bb.0:
153+
; CHECK-NEXT: movq (%rdi), %rax
154+
; CHECK-NEXT: cmpq %rax, %rsi
155+
; CHECK-NEXT: cmovaq %rsi, %rax
156+
; CHECK-NEXT: movq %rax, (%rdi)
157+
; CHECK-NEXT: retq
158+
%2 = load i64, ptr %p, align 8
159+
%3 = icmp ugt i64 %0, %2
160+
%4 = select i1 %3, i64 %0, i64 %2
161+
store atomic i64 %4, ptr %p unordered, align 8
162+
ret void
163+
}
164+
165+
define void @loadstorediffptr(ptr %p, i32 %0) {
166+
; CHECK-LABEL: loadstorediffptr:
167+
; CHECK: # %bb.0:
168+
; CHECK-NEXT: movl (%rdi), %eax
169+
; CHECK-NEXT: cmpl %eax, %esi
170+
; CHECK-NEXT: cmovbel %eax, %esi
171+
; CHECK-NEXT: movl %esi, 4(%rdi)
172+
; CHECK-NEXT: retq
173+
%2 = getelementptr [2 x i32], ptr %p, i32 0, i32 0
174+
%3 = load i32, ptr %2, align 4
175+
%4 = icmp ule i32 %0, %3
176+
%5 = select i1 %4, i32 %3, i32 %0
177+
%6 = getelementptr [2 x i32], ptr %p, i32 0, i32 1
178+
store i32 %5, ptr %6, align 4
179+
ret void
180+
}
181+
182+
declare i32 @llvm.smax.i32(i32, i32)
183+
declare i32 @llvm.umin.i32(i32, i32)

0 commit comments

Comments
 (0)