Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit d8c108c

Browse files
committed
Merging r322372 and r322767:
------------------------------------------------------------------------ r322372 | nemanjai | 2018-01-12 15:58:41 +0100 (Fri, 12 Jan 2018) | 10 lines [PowerPC] Zero-extend the compare operand for ATOMIC_CMP_SWAP Part of the fix for https://bugs.llvm.org/show_bug.cgi?id=35812. This patch ensures that the compare operand for the atomic compare and swap is properly zero-extended to 32 bits if applicable. A follow-up commit will fix the extension for the SETCC node generated when expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS. That will complete the bug fix. Differential Revision: https://reviews.llvm.org/D41856 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r322767 | efriedma | 2018-01-17 23:04:36 +0100 (Wed, 17 Jan 2018) | 12 lines [LegalizeDAG] Fix ATOMIC_CMP_SWAP_WITH_SUCCESS legalization. The code wasn't zero-extending correctly, so the comparison could spuriously fail. Adds some AArch64 tests to cover this case. Inspired by D41791. Differential Revision: https://reviews.llvm.org/D41798 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_60@323334 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent d952162 commit d8c108c

File tree

9 files changed

+240
-11
lines changed

9 files changed

+240
-11
lines changed

lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2965,12 +2965,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
29652965
case ISD::ZERO_EXTEND:
29662966
LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res,
29672967
DAG.getValueType(AtomicType));
2968-
RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
2968+
RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
29692969
ExtRes = LHS;
29702970
break;
29712971
case ISD::ANY_EXTEND:
29722972
LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType);
2973-
RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2));
2973+
RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType);
29742974
break;
29752975
default:
29762976
llvm_unreachable("Invalid atomic op extension");

lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142142
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
143143
setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
144144

145+
// Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
146+
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
147+
145148
// PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
146149
for (MVT VT : MVT::integer_valuetypes()) {
147150
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
@@ -1154,6 +1157,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
11541157
case PPCISD::Hi: return "PPCISD::Hi";
11551158
case PPCISD::Lo: return "PPCISD::Lo";
11561159
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1160+
case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1161+
case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
11571162
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
11581163
case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
11591164
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
@@ -8834,6 +8839,42 @@ SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
88348839
return Op;
88358840
}
88368841

8842+
// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
8843+
// compared to a value that is atomically loaded (atomic loads zero-extend).
8844+
SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
8845+
SelectionDAG &DAG) const {
8846+
assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
8847+
"Expecting an atomic compare-and-swap here.");
8848+
SDLoc dl(Op);
8849+
auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
8850+
EVT MemVT = AtomicNode->getMemoryVT();
8851+
if (MemVT.getSizeInBits() >= 32)
8852+
return Op;
8853+
8854+
SDValue CmpOp = Op.getOperand(2);
8855+
// If this is already correctly zero-extended, leave it alone.
8856+
auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
8857+
if (DAG.MaskedValueIsZero(CmpOp, HighBits))
8858+
return Op;
8859+
8860+
// Clear the high bits of the compare operand.
8861+
unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
8862+
SDValue NewCmpOp =
8863+
DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
8864+
DAG.getConstant(MaskVal, dl, MVT::i32));
8865+
8866+
// Replace the existing compare operand with the properly zero-extended one.
8867+
SmallVector<SDValue, 4> Ops;
8868+
for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
8869+
Ops.push_back(AtomicNode->getOperand(i));
8870+
Ops[2] = NewCmpOp;
8871+
MachineMemOperand *MMO = AtomicNode->getMemOperand();
8872+
SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
8873+
auto NodeTy =
8874+
(MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
8875+
return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
8876+
}
8877+
88378878
SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
88388879
SelectionDAG &DAG) const {
88398880
SDLoc dl(Op);
@@ -9325,6 +9366,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
93259366
return LowerREM(Op, DAG);
93269367
case ISD::BSWAP:
93279368
return LowerBSWAP(Op, DAG);
9369+
case ISD::ATOMIC_CMP_SWAP:
9370+
return LowerATOMIC_CMP_SWAP(Op, DAG);
93289371
}
93299372
}
93309373

lib/Target/PowerPC/PPCISelLowering.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,11 @@ namespace llvm {
430430
/// The 4xf32 load used for v4i1 constants.
431431
QVLFSb,
432432

433+
/// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
434+
/// except they ensure that the compare input is zero-extended for
435+
/// sub-word versions because the atomic loads zero-extend.
436+
ATOMIC_CMP_SWAP_8, ATOMIC_CMP_SWAP_16,
437+
433438
/// GPRC = TOC_ENTRY GA, TOC
434439
/// Loads the entry for GA from the TOC, where the TOC base is given by
435440
/// the last operand.
@@ -955,6 +960,7 @@ namespace llvm {
955960
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
956961
SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const;
957962
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
963+
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
958964
SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
959965
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
960966
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;

lib/Target/PowerPC/PPCInstrInfo.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,13 @@ def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
257257
def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
258258
[SDNPHasChain, SDNPOptInGlue]>;
259259

260+
// PPC-specific atomic operations.
261+
def PPCatomicCmpSwap_8 :
262+
SDNode<"PPCISD::ATOMIC_CMP_SWAP_8", SDTAtomic3,
263+
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
264+
def PPCatomicCmpSwap_16 :
265+
SDNode<"PPCISD::ATOMIC_CMP_SWAP_16", SDTAtomic3,
266+
[SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
260267
def PPClbrx : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
261268
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
262269
def PPCstbrx : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
@@ -1710,6 +1717,11 @@ let usesCustomInserter = 1 in {
17101717
}
17111718
}
17121719

1720+
def : Pat<(PPCatomicCmpSwap_8 xoaddr:$ptr, i32:$old, i32:$new),
1721+
(ATOMIC_CMP_SWAP_I8 xoaddr:$ptr, i32:$old, i32:$new)>;
1722+
def : Pat<(PPCatomicCmpSwap_16 xoaddr:$ptr, i32:$old, i32:$new),
1723+
(ATOMIC_CMP_SWAP_I16 xoaddr:$ptr, i32:$old, i32:$new)>;
1724+
17131725
// Instructions to support atomic operations
17141726
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
17151727
def LBARX : XForm_1<31, 52, (outs gprc:$rD), (ins memrr:$src),

test/CodeGen/AArch64/atomic-ops-lse.ll

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -629,12 +629,27 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
629629

630630
; CHECK-NOT: dmb
631631
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
632-
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
632+
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
633+
; CHECK-NEXT: casab w0, w1, [x[[ADDR]]]
634+
; CHECK-NEXT: ret
635+
636+
ret i8 %old
637+
}
638+
639+
define i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind {
640+
; CHECK-LABEL: test_atomic_cmpxchg_i8_1:
641+
%pair = cmpxchg i8* @var8, i8 %wanted, i8 %new acquire acquire
642+
%success = extractvalue { i8, i1 } %pair, 1
633643

634-
; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]]
635644
; CHECK-NOT: dmb
645+
; CHECK: adrp [[TMPADDR:x[0-9]+]], var8
646+
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8
636647

637-
ret i8 %old
648+
; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
649+
; CHECK-NEXT: cmp w[[NEW]], w0, uxtb
650+
; CHECK-NEXT: cset w0, eq
651+
; CHECK-NEXT: ret
652+
ret i1 %success
638653
}
639654

640655
define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
@@ -644,12 +659,28 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
644659

645660
; CHECK-NOT: dmb
646661
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
647-
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
662+
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
663+
; CHECK-NEXT: casah w0, w1, [x[[ADDR]]]
664+
; CHECK-NEXT: ret
665+
666+
ret i16 %old
667+
}
668+
669+
define i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind {
670+
; CHECK-LABEL: test_atomic_cmpxchg_i16_1:
671+
%pair = cmpxchg i16* @var16, i16 %wanted, i16 %new acquire acquire
672+
%success = extractvalue { i16, i1 } %pair, 1
648673

649-
; CHECK: casah w0, w1, [x[[ADDR]]]
650674
; CHECK-NOT: dmb
675+
; CHECK: adrp [[TMPADDR:x[0-9]+]], var16
676+
; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16
651677

652-
ret i16 %old
678+
; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]]
679+
; CHECK-NEXT: cmp w[[NEW]], w0, uxth
680+
; CHECK-NEXT: cset w0, eq
681+
; CHECK-NEXT: ret
682+
683+
ret i1 %success
653684
}
654685

655686
define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {

test/CodeGen/ARM/atomic-cmpxchg.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,10 @@ entry:
4949
; CHECK-THUMBV6: mov [[EXPECTED:r[0-9]+]], r1
5050
; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1
5151
; CHECK-THUMBV6-NEXT: mov [[RES:r[0-9]+]], r0
52+
; CHECK-THUMBV6-NEXT: uxtb [[EXPECTED_ZEXT:r[0-9]+]], [[EXPECTED]]
5253
; CHECK-THUMBV6-NEXT: movs r0, #1
5354
; CHECK-THUMBV6-NEXT: movs [[ZERO:r[0-9]+]], #0
54-
; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED]]
55+
; CHECK-THUMBV6-NEXT: cmp [[RES]], [[EXPECTED_ZEXT]]
5556
; CHECK-THUMBV6-NEXT: beq [[END:.LBB[0-9_]+]]
5657
; CHECK-THUMBV6-NEXT: mov r0, [[ZERO]]
5758
; CHECK-THUMBV6-NEXT: [[END]]:

test/CodeGen/ARM/cmpxchg-O0.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
1717
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
1818
; CHECK: bne [[RETRY]]
1919
; CHECK: [[DONE]]:
20-
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
20+
; CHECK: uxtb [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
21+
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
2122
; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1
2223
; CHECK: dmb ish
2324
%res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
@@ -36,7 +37,8 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind
3637
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
3738
; CHECK: bne [[RETRY]]
3839
; CHECK: [[DONE]]:
39-
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
40+
; CHECK: uxth [[DESIRED_ZEXT:r[0-9]+]], [[DESIRED]]
41+
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED_ZEXT]]
4042
; CHECK: {{moveq|movweq}} {{r[0-9]+}}, #1
4143
; CHECK: dmb ish
4244
%res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; Make sure that a negative value for the compare-and-swap is zero extended
3+
; from i8/i16 to i32 since it will be compared for equality.
4+
; RUN: llc -mtriple=powerpc64le-linux-gnu -verify-machineinstrs < %s | FileCheck %s
5+
; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr7 < %s | FileCheck %s --check-prefix=CHECK-P7
6+
7+
@str = private unnamed_addr constant [46 x i8] c"FAILED: __atomic_compare_exchange_n() failed.\00"
8+
@str.1 = private unnamed_addr constant [59 x i8] c"FAILED: __atomic_compare_exchange_n() set the wrong value.\00"
9+
@str.2 = private unnamed_addr constant [7 x i8] c"PASSED\00"
10+
11+
define signext i32 @main() {
12+
; CHECK-LABEL: main:
13+
; CHECK: li 3, -32477
14+
; CHECK: lis 12, 0
15+
; CHECK: li 6, 234
16+
; CHECK: sth 3, 46(1)
17+
; CHECK: ori 4, 12, 33059
18+
; CHECK: sync
19+
; CHECK: .LBB0_1: # %L.entry
20+
; CHECK: lharx 3, 0, 5
21+
; CHECK: cmpw 4, 3
22+
; CHECK: bne 0, .LBB0_3
23+
; CHECK: sthcx. 6, 0, 5
24+
; CHECK: bne 0, .LBB0_1
25+
; CHECK: b .LBB0_4
26+
; CHECK: .LBB0_3: # %L.entry
27+
; CHECK: sthcx. 3, 0, 5
28+
; CHECK: .LBB0_4: # %L.entry
29+
; CHECK: cmplwi 3, 33059
30+
; CHECK: lwsync
31+
; CHECK: lhz 3, 46(1)
32+
; CHECK: cmplwi 3, 234
33+
;
34+
; CHECK-P7-LABEL: main:
35+
; CHECK-P7: lis 4, 0
36+
; CHECK-P7: li 7, 0
37+
; CHECK-P7: li 3, -32477
38+
; CHECK-P7: sth 3, 46(1)
39+
; CHECK-P7: li 5, 234
40+
; CHECK-P7: ori 4, 4, 33059
41+
; CHECK-P7: rlwinm 3, 6, 3, 27, 27
42+
; CHECK-P7: ori 7, 7, 65535
43+
; CHECK-P7: sync
44+
; CHECK-P7: slw 8, 5, 3
45+
; CHECK-P7: slw 5, 7, 3
46+
; CHECK-P7: slw 9, 4, 3
47+
; CHECK-P7: and 7, 8, 5
48+
; CHECK-P7: rldicr 4, 6, 0, 61
49+
; CHECK-P7: and 8, 9, 5
50+
; CHECK-P7: .LBB0_1: # %L.entry
51+
; CHECK-P7: lwarx 9, 0, 4
52+
; CHECK-P7: and 6, 9, 5
53+
; CHECK-P7: cmpw 0, 6, 8
54+
; CHECK-P7: bne 0, .LBB0_3
55+
; CHECK-P7: andc 9, 9, 5
56+
; CHECK-P7: or 9, 9, 7
57+
; CHECK-P7: stwcx. 9, 0, 4
58+
; CHECK-P7: bne 0, .LBB0_1
59+
; CHECK-P7: b .LBB0_4
60+
; CHECK-P7: .LBB0_3: # %L.entry
61+
; CHECK-P7: stwcx. 9, 0, 4
62+
; CHECK-P7: .LBB0_4: # %L.entry
63+
; CHECK-P7: srw 3, 6, 3
64+
; CHECK-P7: lwsync
65+
; CHECK-P7: cmplwi 3, 33059
66+
; CHECK-P7: lhz 3, 46(1)
67+
; CHECK-P7: cmplwi 3, 234
68+
L.entry:
69+
%value.addr = alloca i16, align 2
70+
store i16 -32477, i16* %value.addr, align 2
71+
%0 = cmpxchg i16* %value.addr, i16 -32477, i16 234 seq_cst seq_cst
72+
%1 = extractvalue { i16, i1 } %0, 1
73+
br i1 %1, label %L.B0000, label %L.B0003
74+
75+
L.B0003: ; preds = %L.entry
76+
%puts = call i32 @puts(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @str, i64 0, i64 0))
77+
ret i32 1
78+
79+
L.B0000: ; preds = %L.entry
80+
%2 = load i16, i16* %value.addr, align 2
81+
%3 = icmp eq i16 %2, 234
82+
br i1 %3, label %L.B0001, label %L.B0005
83+
84+
L.B0005: ; preds = %L.B0000
85+
%puts1 = call i32 @puts(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @str.1, i64 0, i64 0))
86+
ret i32 1
87+
88+
L.B0001: ; preds = %L.B0000
89+
%puts2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8], [7 x i8]* @str.2, i64 0, i64 0))
90+
ret i32 0
91+
}
92+
93+
; Function Attrs: nounwind
94+
declare i32 @puts(i8* nocapture readonly) #0

0 commit comments

Comments
 (0)