Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 7dfc343

Browse files
committed
[X86] Don't zero/sign-extend i1, i8, or i16 return values to 32 bits (PR22532)
This matches GCC and MSVC's behaviour, and saves on code size. We were already not extending i1 return values on x86_64 after r127766. This takes that patch further by applying it to x86 target as well, and also for i8 and i16. The ABI docs have been unclear about the required behaviour here. The new i386 psABI [1] clearly states (Table 2.4, page 14) that i1, i8, and i16 return vales do not need to be extended beyond 8 bits. The x86_64 ABI doc is being updated to say the same [2]. Differential Revision: http://reviews.llvm.org/D16907 [1]. https://01.org/sites/default/files/file_attach/intel386-psabi-1.0.pdf [2]. https://groups.google.com/d/msg/x86-64-abi/E8O33onbnGQ/_RFWw_ixDQAJ git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@260133 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent bff2e82 commit 7dfc343

19 files changed

+207
-66
lines changed

docs/LangRef.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -907,8 +907,7 @@ Currently, only the following parameter attributes are defined:
907907
``zeroext``
908908
This indicates to the code generator that the parameter or return
909909
value should be zero-extended to the extent required by the target's
910-
ABI (which is usually 32-bits, but is 8-bits for a i1 on x86-64) by
911-
the caller (for a parameter) or the callee (for a return value).
910+
ABI by the caller (for a parameter) or the callee (for a return value).
912911
``signext``
913912
This indicates to the code generator that the parameter or return
914913
value should be sign-extended to the extent required by the target's

include/llvm/Target/TargetLowering.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2537,12 +2537,12 @@ class TargetLowering : public TargetLoweringBase {
25372537
}
25382538

25392539
/// Return the type that should be used to zero or sign extend a
2540-
/// zeroext/signext integer argument or return value. FIXME: Most C calling
2541-
/// convention requires the return type to be promoted, but this is not true
2542-
/// all the time, e.g. i1 on x86-64. It is also not necessary for non-C
2543-
/// calling conventions. The frontend should handle this and include all of
2544-
/// the necessary information.
2545-
virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2540+
/// zeroext/signext integer return value. FIXME: Some C calling conventions
2541+
/// require the return type to be promoted, but this is not true all the time,
2542+
/// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
2543+
/// conventions. The frontend should handle this and include all of the
2544+
/// necessary information.
2545+
virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
25462546
ISD::NodeType /*ExtendKind*/) const {
25472547
EVT MinVT = getRegisterType(Context, MVT::i32);
25482548
return VT.bitsLT(MinVT) ? MinVT : VT;

lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1381,7 +1381,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
13811381
EVT VT = ValueVTs[j];
13821382

13831383
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1384-
VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind);
1384+
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
13851385

13861386
unsigned NumParts = TLI.getNumRegisters(Context, VT);
13871387
MVT PartVT = TLI.getRegisterType(Context, VT);

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2374,15 +2374,14 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
23742374
return true;
23752375
}
23762376

2377-
EVT
2378-
X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
2379-
ISD::NodeType ExtendKind) const {
2380-
MVT ReturnMVT;
2381-
// TODO: Is this also valid on 32-bit?
2382-
if (Subtarget.is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
2377+
EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2378+
ISD::NodeType ExtendKind) const {
2379+
MVT ReturnMVT = MVT::i32;
2380+
2381+
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) {
2382+
// The ABI does not require i1, i8 or i16 to be extended.
23832383
ReturnMVT = MVT::i8;
2384-
else
2385-
ReturnMVT = MVT::i32;
2384+
}
23862385

23872386
EVT MinVT = getRegisterType(Context, ReturnMVT);
23882387
return VT.bitsLT(MinVT) ? MinVT : VT;

lib/Target/X86/X86ISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1092,8 +1092,8 @@ namespace llvm {
10921092

10931093
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
10941094

1095-
EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
1096-
ISD::NodeType ExtendKind) const override;
1095+
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1096+
ISD::NodeType ExtendKind) const override;
10971097

10981098
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
10991099
bool isVarArg,

test/CodeGen/X86/2007-08-10-SignExtSubreg.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22

33
@X = global i32 0 ; <i32*> [#uses=1]
44

5-
define signext i8 @_Z3fooi(i32 %x) {
5+
define i32 @_Z3fooi(i32 %x) {
66
entry:
77
store i32 %x, i32* @X, align 4
88
%retval67 = trunc i32 %x to i8 ; <i8> [#uses=1]
9-
ret i8 %retval67
9+
%retval = sext i8 %retval67 to i32
10+
ret i32 %retval
1011
}

test/CodeGen/X86/3addr-16bit.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ entry:
1212

1313
; 64BIT-LABEL: t1:
1414
; 64BIT-NOT: movw %si, %ax
15-
; 64BIT: leal 1(%rsi), %eax
15+
; 64BIT: leal 1(%rsi), %ebx
1616
%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
1717
%1 = add i16 %k, 1 ; <i16> [#uses=3]
1818
br i1 %0, label %bb, label %bb1
@@ -34,8 +34,8 @@ entry:
3434

3535
; 64BIT-LABEL: t2:
3636
; 64BIT-NOT: movw %si, %ax
37-
; 64BIT: leal -1(%rsi), %eax
38-
; 64BIT: movzwl %ax
37+
; 64BIT: leal -1(%rsi), %ebx
38+
; 64BIT: movzwl %bx
3939
%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
4040
%1 = add i16 %k, -1 ; <i16> [#uses=3]
4141
br i1 %0, label %bb, label %bb1
@@ -59,7 +59,7 @@ entry:
5959

6060
; 64BIT-LABEL: t3:
6161
; 64BIT-NOT: movw %si, %ax
62-
; 64BIT: leal 2(%rsi), %eax
62+
; 64BIT: leal 2(%rsi), %ebx
6363
%0 = add i16 %k, 2 ; <i16> [#uses=3]
6464
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
6565
br i1 %1, label %bb, label %bb1
@@ -82,7 +82,7 @@ entry:
8282

8383
; 64BIT-LABEL: t4:
8484
; 64BIT-NOT: movw %si, %ax
85-
; 64BIT: leal (%rsi,%rdi), %eax
85+
; 64BIT: leal (%rsi,%rdi), %ebx
8686
%0 = add i16 %k, %c ; <i16> [#uses=3]
8787
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
8888
br i1 %1, label %bb, label %bb1

test/CodeGen/X86/bool-zext.ll

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
1+
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s -check-prefix=X86
12
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=X64
23
; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
34

4-
; X64: @bar1
5+
; Check that the argument gets zero-extended before calling.
6+
; X86-LABEL: bar1
7+
; X86: movzbl
8+
; X86: calll
9+
; X64-LABEL: bar1
510
; X64: movzbl
611
; X64: jmp
7-
; WIN64: @bar1
12+
; WIN64-LABEL: bar1
813
; WIN64: movzbl
914
; WIN64: callq
1015
define void @bar1(i1 zeroext %v1) nounwind ssp {
@@ -14,10 +19,11 @@ entry:
1419
ret void
1520
}
1621

17-
; X64: @bar2
22+
; Check that on x86-64 the arguments are simply forwarded.
23+
; X64-LABEL: bar2
1824
; X64-NOT: movzbl
1925
; X64: jmp
20-
; WIN64: @bar2
26+
; WIN64-LABEL: bar2
2127
; WIN64-NOT: movzbl
2228
; WIN64: callq
2329
define void @bar2(i8 zeroext %v1) nounwind ssp {
@@ -27,16 +33,19 @@ entry:
2733
ret void
2834
}
2935

30-
; X64: @bar3
31-
; X64: callq
32-
; X64-NOT: movzbl
33-
; X64-NOT: and
34-
; X64: ret
35-
; WIN64: @bar3
36-
; WIN64: callq
37-
; WIN64-NOT: movzbl
38-
; WIN64-NOT: and
39-
; WIN64: ret
36+
; Check that i1 return values are not zero-extended.
37+
; X86-LABEL: bar3
38+
; X86: call
39+
; X86-NEXT: {{add|pop}}
40+
; X86-NEXT: ret
41+
; X64-LABEL: bar3
42+
; X64: call
43+
; X64-NEXT: {{add|pop}}
44+
; X64-NEXT: ret
45+
; WIN64-LABEL: bar3
46+
; WIN64: call
47+
; WIN64-NEXT: {{add|pop}}
48+
; WIN64-NEXT: ret
4049
define zeroext i1 @bar3() nounwind ssp {
4150
entry:
4251
%call = call i1 @foo2() nounwind

test/CodeGen/X86/divrem8_ext.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ target triple = "x86_64-apple-macosx10.10.0"
66
define zeroext i8 @test_udivrem_zext_ah(i8 %x, i8 %y) {
77
; CHECK-LABEL: test_udivrem_zext_ah
88
; CHECK: divb
9-
; CHECK: movzbl %ah, [[REG_REM:%[a-z0-9]+]]
9+
; CHECK: movzbl %ah, %e[[REG_REM:[a-z]]]x
1010
; CHECK: movb %al, ([[REG_ZPTR:%[a-z0-9]+]])
11-
; CHECK: movl [[REG_REM]], %eax
11+
; CHECK: movb %[[REG_REM]]l, %al
1212
; CHECK: ret
1313
%div = udiv i8 %x, %y
1414
store i8 %div, i8* @z
@@ -51,9 +51,9 @@ define signext i8 @test_sdivrem_sext_ah(i8 %x, i8 %y) {
5151
; CHECK-LABEL: test_sdivrem_sext_ah
5252
; CHECK: cbtw
5353
; CHECK: idivb
54-
; CHECK: movsbl %ah, [[REG_REM:%[a-z0-9]+]]
54+
; CHECK: movsbl %ah, %e[[REG_REM:[a-z]]]x
5555
; CHECK: movb %al, ([[REG_ZPTR]])
56-
; CHECK: movl [[REG_REM]], %eax
56+
; CHECK: movb %[[REG_REM]]l, %al
5757
; CHECK: ret
5858
%div = sdiv i8 %x, %y
5959
store i8 %div, i8* @z

test/CodeGen/X86/float-conv-elim.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ define i32 @foo2(i8 %a) #0 {
2121

2222
; CHECK-LABEL: bar
2323
; CHECK-NOT: cvt
24-
; CHECK: movl
24+
; CHECK: movb
2525
define zeroext i8 @bar(i8 zeroext %a) #0 {
2626
%conv = uitofp i8 %a to float
2727
%conv1 = fptoui float %conv to i8

test/CodeGen/X86/h-registers-3.ll

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,31 @@
1-
; RUN: llc < %s -march=x86 | grep mov | count 1
2-
; RUN: llc < %s -march=x86-64 | grep mov | count 1
3-
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | grep mov | count 1
1+
; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X86
2+
; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
3+
; RUN: llc < %s -mtriple=x86_64-linux-gnux32 | FileCheck %s -check-prefix=X32
44

55
define zeroext i8 @foo() nounwind ssp {
66
entry:
77
%0 = tail call zeroext i16 (...) @bar() nounwind
88
%1 = lshr i16 %0, 8
99
%2 = trunc i16 %1 to i8
1010
ret i8 %2
11+
12+
; X86-LABEL: foo
13+
; X86: calll
14+
; X86-NEXT: movb %ah, %al
15+
; X86-NEXT: addl $12, %esp
16+
; X86-NEXT: retl
17+
18+
; X64-LABEL: foo
19+
; X64: callq
20+
; X64-NEXT: shrl $8, %eax
21+
; X64-NEXT: popq
22+
; X64-NEXT: retq
23+
24+
; X32-LABEL: foo
25+
; X32: callq
26+
; X32-NEXT: shrl $8, %eax
27+
; X32-NEXT: popq
28+
; X32-NEXT: retq
1129
}
1230

1331
declare zeroext i16 @bar(...)

test/CodeGen/X86/promote-i16.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,19 @@
33
define signext i16 @foo(i16 signext %x) nounwind {
44
entry:
55
; CHECK-LABEL: foo:
6-
; CHECK-NOT: movzwl
7-
; CHECK: movswl 4(%esp), %eax
8-
; CHECK: xorl $21998, %eax
6+
; CHECK: movzwl 4(%esp), %eax
7+
; CHECK-NEXT: xorl $21998, %eax
8+
; CHECK-NEXT: retl
99
%0 = xor i16 %x, 21998
1010
ret i16 %0
1111
}
1212

1313
define signext i16 @bar(i16 signext %x) nounwind {
1414
entry:
1515
; CHECK-LABEL: bar:
16-
; CHECK-NOT: movzwl
17-
; CHECK: movswl 4(%esp), %eax
18-
; CHECK: xorl $-10770, %eax
16+
; CHECK: movzwl 4(%esp), %eax
17+
; CHECK-NEXT: xorl $54766, %eax
18+
; CHECK-NEXT: retl
1919
%0 = xor i16 %x, 54766
2020
ret i16 %0
2121
}

test/CodeGen/X86/return-ext.ll

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; RUN: llc < %s -mtriple=i686-unknown-linux-gnu | FileCheck %s
2+
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
3+
4+
5+
@x = common global i32 0, align 4
6+
7+
define zeroext i1 @unsigned_i1() {
8+
entry:
9+
%0 = load i32, i32* @x
10+
%cmp = icmp eq i32 %0, 42
11+
ret i1 %cmp
12+
13+
; Unsigned i1 return values are not extended.
14+
; CHECK-LABEL: unsigned_i1:
15+
; CHECK: cmp
16+
; CHECK-NEXT: sete
17+
; CHECK-NEXT: ret
18+
}
19+
20+
define zeroext i8 @unsigned_i8() {
21+
entry:
22+
%0 = load i32, i32* @x
23+
%cmp = icmp eq i32 %0, 42
24+
%retval = zext i1 %cmp to i8
25+
ret i8 %retval
26+
27+
; Unsigned i8 return values are not extended.
28+
; CHECK-LABEL: unsigned_i8:
29+
; CHECK: cmp
30+
; CHECK-NEXT: sete
31+
; CHECK-NEXT: ret
32+
}
33+
34+
define signext i8 @signed_i8() {
35+
entry:
36+
%0 = load i32, i32* @x
37+
%cmp = icmp eq i32 %0, 42
38+
%retval = zext i1 %cmp to i8
39+
ret i8 %retval
40+
41+
; Signed i8 return values are not extended.
42+
; CHECK-LABEL: signed_i8:
43+
; CHECK: cmp
44+
; CHECK-NEXT: sete
45+
; CHECK-NEXT: ret
46+
}
47+
48+
@a = common global i16 0
49+
@b = common global i16 0
50+
define zeroext i16 @unsigned_i16() {
51+
entry:
52+
%0 = load i16, i16* @a
53+
%1 = load i16, i16* @b
54+
%add = add i16 %1, %0
55+
ret i16 %add
56+
57+
; i16 return values are not extended.
58+
; CHECK-LABEL: unsigned_i16:
59+
; CHECK: movw
60+
; CHECK-NEXT: addw
61+
; CHECK-NEXT: ret
62+
}
63+
64+
65+
define i32 @use_i1() {
66+
entry:
67+
%0 = call i1 @unsigned_i1();
68+
%1 = zext i1 %0 to i32
69+
ret i32 %1
70+
71+
; The high 24 bits of %eax from a function returning i1 are undefined.
72+
; CHECK-LABEL: use_i1:
73+
; CHECK: call
74+
; CHECK-NEXT: movzbl
75+
; CHECK-NEXT: {{pop|add}}
76+
; CHECK-NEXT: ret
77+
}
78+
79+
define i32 @use_i8() {
80+
entry:
81+
%0 = call i8 @unsigned_i8();
82+
%1 = zext i8 %0 to i32
83+
ret i32 %1
84+
85+
; The high 24 bits of %eax from a function returning i8 are undefined.
86+
; CHECK-LABEL: use_i8:
87+
; CHECK: call
88+
; CHECK-NEXT: movzbl
89+
; CHECK-NEXT: {{pop|add}}
90+
; CHECK-NEXT: ret
91+
}
92+
93+
define i32 @use_i16() {
94+
entry:
95+
%0 = call i16 @unsigned_i16();
96+
%1 = zext i16 %0 to i32
97+
ret i32 %1
98+
99+
; The high 16 bits of %eax from a function returning i16 are undefined.
100+
; CHECK-LABEL: use_i16:
101+
; CHECK: call
102+
; CHECK-NEXT: movzwl
103+
; CHECK-NEXT: {{pop|add}}
104+
; CHECK-NEXT: ret
105+
}

test/CodeGen/X86/select.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ entry:
6666
%2 = load i8, i8* %1, align 1 ; <i8> [#uses=1]
6767
ret i8 %2
6868
; CHECK-LABEL: test4:
69-
; CHECK: movsbl ({{.*}},4), %eax
69+
; CHECK: movb ({{.*}},4), %al
7070

7171
; ATOM-LABEL: test4:
72-
; ATOM: movsbl ({{.*}},4), %eax
72+
; ATOM: movb ({{.*}},4), %al
7373
}
7474

7575
define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {

0 commit comments

Comments
 (0)