Skip to content

Commit 8dacca9

Browse files
committed
[PowerPC] Sign extend sub-word values for atomic comparisons
Atomic comparison instructions use the sub-word load instruction on Power8 and up but the value is not sign extended prior to the signed word compare instruction. This patch adds that sign extension. llvm-svn: 282182
1 parent 29c59cc commit 8dacca9

File tree

2 files changed

+80
-2
lines changed

2 files changed

+80
-2
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8507,8 +8507,17 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
85078507
if (BinOpcode)
85088508
BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
85098509
if (CmpOpcode) {
8510-
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8511-
.addReg(incr).addReg(dest);
8510+
// Signed comparisons of byte or halfword values must be sign-extended.
8511+
if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
8512+
unsigned ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
8513+
BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
8514+
ExtReg).addReg(dest);
8515+
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8516+
.addReg(incr).addReg(ExtReg);
8517+
} else
8518+
BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
8519+
.addReg(incr).addReg(dest);
8520+
85128521
BuildMI(BB, dl, TII->get(PPC::BCC))
85138522
.addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
85148523
BB->addSuccessor(loop2MBB);

llvm/test/CodeGen/PowerPC/pr30451.ll

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; RUN: llc < %s -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown | FileCheck %s
2+
define i8 @atomic_min_i8() {
3+
top:
4+
%0 = alloca i8, align 2
5+
%1 = bitcast i8* %0 to i8*
6+
call void @llvm.lifetime.start(i64 2, i8* %1)
7+
store i8 -1, i8* %0, align 2
8+
%2 = atomicrmw min i8* %0, i8 0 acq_rel
9+
%3 = load atomic i8, i8* %0 acquire, align 8
10+
call void @llvm.lifetime.end(i64 2, i8* %1)
11+
ret i8 %3
12+
; CHECK-LABEL: atomic_min_i8
13+
; CHECK: lbarx [[DST:[0-9]+]],
14+
; CHECK-NEXT: extsb [[EXT:[0-9]+]], [[DST]]
15+
; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]]
16+
; CHECK-NEXT: bge 0
17+
}
18+
define i16 @atomic_min_i16() {
19+
top:
20+
%0 = alloca i16, align 2
21+
%1 = bitcast i16* %0 to i8*
22+
call void @llvm.lifetime.start(i64 2, i8* %1)
23+
store i16 -1, i16* %0, align 2
24+
%2 = atomicrmw min i16* %0, i16 0 acq_rel
25+
%3 = load atomic i16, i16* %0 acquire, align 8
26+
call void @llvm.lifetime.end(i64 2, i8* %1)
27+
ret i16 %3
28+
; CHECK-LABEL: atomic_min_i16
29+
; CHECK: lharx [[DST:[0-9]+]],
30+
; CHECK-NEXT: extsh [[EXT:[0-9]+]], [[DST]]
31+
; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]]
32+
; CHECK-NEXT: bge 0
33+
}
34+
35+
define i8 @atomic_max_i8() {
36+
top:
37+
%0 = alloca i8, align 2
38+
%1 = bitcast i8* %0 to i8*
39+
call void @llvm.lifetime.start(i64 2, i8* %1)
40+
store i8 -1, i8* %0, align 2
41+
%2 = atomicrmw max i8* %0, i8 0 acq_rel
42+
%3 = load atomic i8, i8* %0 acquire, align 8
43+
call void @llvm.lifetime.end(i64 2, i8* %1)
44+
ret i8 %3
45+
; CHECK-LABEL: atomic_max_i8
46+
; CHECK: lbarx [[DST:[0-9]+]],
47+
; CHECK-NEXT: extsb [[EXT:[0-9]+]], [[DST]]
48+
; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]]
49+
; CHECK-NEXT: ble 0
50+
}
51+
define i16 @atomic_max_i16() {
52+
top:
53+
%0 = alloca i16, align 2
54+
%1 = bitcast i16* %0 to i8*
55+
call void @llvm.lifetime.start(i64 2, i8* %1)
56+
store i16 -1, i16* %0, align 2
57+
%2 = atomicrmw max i16* %0, i16 0 acq_rel
58+
%3 = load atomic i16, i16* %0 acquire, align 8
59+
call void @llvm.lifetime.end(i64 2, i8* %1)
60+
ret i16 %3
61+
; CHECK-LABEL: atomic_max_i16
62+
; CHECK: lharx [[DST:[0-9]+]],
63+
; CHECK-NEXT: extsh [[EXT:[0-9]+]], [[DST]]
64+
; CHECK-NEXT: cmpw {{[0-9]+}}, [[EXT]]
65+
; CHECK-NEXT: ble 0
66+
}
67+
68+
declare void @llvm.lifetime.start(i64, i8*)
69+
declare void @llvm.lifetime.end(i64, i8*)

0 commit comments

Comments
 (0)