Skip to content

Commit 796b0c1

Browse files
committed
fix inling tests and idempotent
1 parent beec8ab commit 796b0c1

File tree

5 files changed

+817
-94
lines changed

5 files changed

+817
-94
lines changed

llvm/lib/Target/X86/X86.td

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,8 @@ def ProcessorFeatures {
819819
TuningSlow3OpsLEA,
820820
TuningSlowDivide64,
821821
TuningSlowIncDec,
822-
TuningInsertVZEROUPPER
822+
TuningInsertVZEROUPPER,
823+
TuningAvoidMFENCE
823824
];
824825

825826
list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
@@ -835,7 +836,8 @@ def ProcessorFeatures {
835836
TuningFastSHLDRotate,
836837
TuningFast15ByteNOP,
837838
TuningPOPCNTFalseDeps,
838-
TuningInsertVZEROUPPER
839+
TuningInsertVZEROUPPER,
840+
TuningAvoidMFENCE
839841
];
840842

841843
list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
@@ -854,7 +856,8 @@ def ProcessorFeatures {
854856
TuningPOPCNTFalseDeps,
855857
TuningLZCNTFalseDeps,
856858
TuningInsertVZEROUPPER,
857-
TuningAllowLight256Bit
859+
TuningAllowLight256Bit,
860+
TuningAvoidMFENCE
858861
];
859862

860863
list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
@@ -878,7 +881,8 @@ def ProcessorFeatures {
878881
TuningFastGather,
879882
TuningPOPCNTFalseDeps,
880883
TuningInsertVZEROUPPER,
881-
TuningAllowLight256Bit
884+
TuningAllowLight256Bit,
885+
TuningAvoidMFENCE
882886
];
883887

884888
// Nehalem
@@ -1677,8 +1681,7 @@ def : ProcModel<"nocona", GenericPostRAModel, [
16771681
],
16781682
[
16791683
TuningSlowUAMem16,
1680-
TuningInsertVZEROUPPER,
1681-
TuningAvoidMFENCE
1684+
TuningInsertVZEROUPPER
16821685
]>;
16831686

16841687
// Intel Core 2 Solo/Duo.

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31001,21 +31001,10 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
3100131001
// otherwise, we might be able to be more aggressive on relaxed idempotent
3100231002
// rmw. In practice, they do not look useful, so we don't try to be
3100331003
// especially clever.
31004-
if (SSID == SyncScope::SingleThread)
31005-
// FIXME: we could just insert an ISD::MEMBARRIER here, except we are at
31006-
// the IR level, so we must wrap it in an intrinsic.
31007-
return nullptr;
31008-
31009-
if (!Subtarget.hasMFence())
31010-
// FIXME: it might make sense to use a locked operation here but on a
31011-
// different cache-line to prevent cache-line bouncing. In practice it
31012-
// is probably a small win, and x86 processors without mfence are rare
31013-
// enough that we do not bother.
31014-
return nullptr;
3101531004

31016-
Function *MFence =
31017-
llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence);
31018-
Builder.CreateCall(MFence, {});
31005+
// Use `fence seq_cst` over `llvm.x64.sse2.mfence` here to get the correct
31006+
// lowering for SSID == SyncScope::SingleThread and avoidMFence || !hasMFence
31007+
Builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SSID);
3101931008

3102031009
// Finally we can emit the atomic load.
3102131010
LoadInst *Loaded = Builder.CreateAlignedLoad(

llvm/test/CodeGen/X86/atomic-idempotent.ll

Lines changed: 30 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -27,18 +27,16 @@ define i8 @add8(ptr %p) {
2727
;
2828
; X86-SLM-LABEL: add8:
2929
; X86-SLM: # %bb.0:
30-
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
31-
; X86-SLM-NEXT: xorl %eax, %eax
32-
; X86-SLM-NEXT: lock xaddb %al, (%ecx)
33-
; X86-SLM-NEXT: # kill: def $al killed $al killed $eax
30+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
31+
; X86-SLM-NEXT: lock orl $0, (%esp)
32+
; X86-SLM-NEXT: movzbl (%eax), %eax
3433
; X86-SLM-NEXT: retl
3534
;
3635
; X86-ATOM-LABEL: add8:
3736
; X86-ATOM: # %bb.0:
38-
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
39-
; X86-ATOM-NEXT: xorl %eax, %eax
40-
; X86-ATOM-NEXT: lock xaddb %al, (%ecx)
41-
; X86-ATOM-NEXT: # kill: def $al killed $al killed $eax
37+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
38+
; X86-ATOM-NEXT: lock orl $0, (%esp)
39+
; X86-ATOM-NEXT: movzbl (%eax), %eax
4240
; X86-ATOM-NEXT: nop
4341
; X86-ATOM-NEXT: nop
4442
; X86-ATOM-NEXT: retl
@@ -62,26 +60,18 @@ define i16 @or16(ptr %p) {
6260
;
6361
; X86-SLM-LABEL: or16:
6462
; X86-SLM: # %bb.0:
65-
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
66-
; X86-SLM-NEXT: movzwl (%ecx), %eax
67-
; X86-SLM-NEXT: .p2align 4, 0x90
68-
; X86-SLM-NEXT: .LBB1_1: # %atomicrmw.start
69-
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
70-
; X86-SLM-NEXT: lock cmpxchgw %ax, (%ecx)
71-
; X86-SLM-NEXT: jne .LBB1_1
72-
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
63+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
64+
; X86-SLM-NEXT: lock orl $0, (%esp)
65+
; X86-SLM-NEXT: movzwl (%eax), %eax
7366
; X86-SLM-NEXT: retl
7467
;
7568
; X86-ATOM-LABEL: or16:
7669
; X86-ATOM: # %bb.0:
77-
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
78-
; X86-ATOM-NEXT: movzwl (%ecx), %eax
79-
; X86-ATOM-NEXT: .p2align 4, 0x90
80-
; X86-ATOM-NEXT: .LBB1_1: # %atomicrmw.start
81-
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
82-
; X86-ATOM-NEXT: lock cmpxchgw %ax, (%ecx)
83-
; X86-ATOM-NEXT: jne .LBB1_1
84-
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
70+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
71+
; X86-ATOM-NEXT: lock orl $0, (%esp)
72+
; X86-ATOM-NEXT: movzwl (%eax), %eax
73+
; X86-ATOM-NEXT: nop
74+
; X86-ATOM-NEXT: nop
8575
; X86-ATOM-NEXT: retl
8676
%1 = atomicrmw or ptr %p, i16 0 acquire
8777
ret i16 %1
@@ -103,26 +93,18 @@ define i32 @xor32(ptr %p) {
10393
;
10494
; X86-SLM-LABEL: xor32:
10595
; X86-SLM: # %bb.0:
106-
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
107-
; X86-SLM-NEXT: movl (%ecx), %eax
108-
; X86-SLM-NEXT: .p2align 4, 0x90
109-
; X86-SLM-NEXT: .LBB2_1: # %atomicrmw.start
110-
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
111-
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
112-
; X86-SLM-NEXT: jne .LBB2_1
113-
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
96+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
97+
; X86-SLM-NEXT: lock orl $0, (%esp)
98+
; X86-SLM-NEXT: movl (%eax), %eax
11499
; X86-SLM-NEXT: retl
115100
;
116101
; X86-ATOM-LABEL: xor32:
117102
; X86-ATOM: # %bb.0:
118-
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
119-
; X86-ATOM-NEXT: movl (%ecx), %eax
120-
; X86-ATOM-NEXT: .p2align 4, 0x90
121-
; X86-ATOM-NEXT: .LBB2_1: # %atomicrmw.start
122-
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
123-
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
124-
; X86-ATOM-NEXT: jne .LBB2_1
125-
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
103+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
104+
; X86-ATOM-NEXT: lock orl $0, (%esp)
105+
; X86-ATOM-NEXT: movl (%eax), %eax
106+
; X86-ATOM-NEXT: nop
107+
; X86-ATOM-NEXT: nop
126108
; X86-ATOM-NEXT: retl
127109
%1 = atomicrmw xor ptr %p, i32 0 release
128110
ret i32 %1
@@ -318,26 +300,18 @@ define i32 @and32 (ptr %p) {
318300
;
319301
; X86-SLM-LABEL: and32:
320302
; X86-SLM: # %bb.0:
321-
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx
322-
; X86-SLM-NEXT: movl (%ecx), %eax
323-
; X86-SLM-NEXT: .p2align 4, 0x90
324-
; X86-SLM-NEXT: .LBB5_1: # %atomicrmw.start
325-
; X86-SLM-NEXT: # =>This Inner Loop Header: Depth=1
326-
; X86-SLM-NEXT: lock cmpxchgl %eax, (%ecx)
327-
; X86-SLM-NEXT: jne .LBB5_1
328-
; X86-SLM-NEXT: # %bb.2: # %atomicrmw.end
303+
; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
304+
; X86-SLM-NEXT: lock orl $0, (%esp)
305+
; X86-SLM-NEXT: movl (%eax), %eax
329306
; X86-SLM-NEXT: retl
330307
;
331308
; X86-ATOM-LABEL: and32:
332309
; X86-ATOM: # %bb.0:
333-
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx
334-
; X86-ATOM-NEXT: movl (%ecx), %eax
335-
; X86-ATOM-NEXT: .p2align 4, 0x90
336-
; X86-ATOM-NEXT: .LBB5_1: # %atomicrmw.start
337-
; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1
338-
; X86-ATOM-NEXT: lock cmpxchgl %eax, (%ecx)
339-
; X86-ATOM-NEXT: jne .LBB5_1
340-
; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end
310+
; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
311+
; X86-ATOM-NEXT: lock orl $0, (%esp)
312+
; X86-ATOM-NEXT: movl (%eax), %eax
313+
; X86-ATOM-NEXT: nop
314+
; X86-ATOM-NEXT: nop
341315
; X86-ATOM-NEXT: retl
342316
%1 = atomicrmw and ptr %p, i32 -1 acq_rel
343317
ret i32 %1

0 commit comments

Comments
 (0)