Skip to content

Commit 8058a04

Browse files
committed
[X86] Surpport APX promoted RAO-INT and MOVBE instructions
1 parent 595d780 commit 8058a04

File tree

12 files changed

+697
-34
lines changed

12 files changed

+697
-34
lines changed

llvm/lib/Target/X86/X86InstrMisc.td

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1090,7 +1090,7 @@ def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
10901090
//===----------------------------------------------------------------------===//
10911091
// MOVBE Instructions
10921092
//
1093-
let Predicates = [HasMOVBE] in {
1093+
let Predicates = [HasMOVBE, NoEGPR] in {
10941094
let SchedRW = [WriteALULd] in {
10951095
def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
10961096
"movbe{w}\t{$src, $dst|$dst, $src}",
@@ -1121,6 +1121,58 @@ let Predicates = [HasMOVBE] in {
11211121
}
11221122
}
11231123

1124+
let Predicates = [HasMOVBE, HasEGPR, In64BitMode] in {
1125+
let SchedRW = [WriteALULd] in {
1126+
def MOVBE16rm_EVEX : I<0x60, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
1127+
"movbe{w}\t{$src, $dst|$dst, $src}",
1128+
[(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
1129+
EVEX, NoCD8, T_MAP4, PD;
1130+
def MOVBE32rm_EVEX : I<0x60, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
1131+
"movbe{l}\t{$src, $dst|$dst, $src}",
1132+
[(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
1133+
EVEX, NoCD8, T_MAP4;
1134+
def MOVBE64rm_EVEX : RI<0x60, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
1135+
"movbe{q}\t{$src, $dst|$dst, $src}",
1136+
[(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
1137+
EVEX, NoCD8, T_MAP4;
1138+
}
1139+
let SchedRW = [WriteStore] in {
1140+
def MOVBE16mr_EVEX : I<0x61, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
1141+
"movbe{w}\t{$src, $dst|$dst, $src}",
1142+
[(store (bswap GR16:$src), addr:$dst)]>,
1143+
EVEX, NoCD8, T_MAP4, PD;
1144+
def MOVBE32mr_EVEX : I<0x61, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
1145+
"movbe{l}\t{$src, $dst|$dst, $src}",
1146+
[(store (bswap GR32:$src), addr:$dst)]>,
1147+
EVEX, NoCD8, T_MAP4;
1148+
def MOVBE64mr_EVEX : RI<0x61, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
1149+
"movbe{q}\t{$src, $dst|$dst, $src}",
1150+
[(store (bswap GR64:$src), addr:$dst)]>,
1151+
EVEX, NoCD8, T_MAP4;
1152+
}
1153+
let SchedRW = [WriteALU] in {
1154+
def MOVBE16rr_EVEX : I<0x61, MRMDestReg, (outs), (ins GR16:$dst, GR16:$src),
1155+
"movbe{w}\t{$src, $dst|$dst, $src}", []>,
1156+
EVEX, NoCD8, T_MAP4, PD;
1157+
def MOVBE32rr_EVEX : I<0x61, MRMDestReg, (outs), (ins GR32:$dst, GR32:$src),
1158+
"movbe{l}\t{$src, $dst|$dst, $src}", []>,
1159+
EVEX, NoCD8, T_MAP4;
1160+
def MOVBE64rr_EVEX : RI<0x61, MRMDestReg, (outs), (ins GR64:$dst, GR64:$src),
1161+
"movbe{q}\t{$src, $dst|$dst, $src}", []>,
1162+
EVEX, NoCD8, T_MAP4;
1163+
1164+
def MOVBE16rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
1165+
"movbe{w}\t{$src, $dst|$dst, $src}", []>,
1166+
EVEX, NoCD8, T_MAP4, PD;
1167+
def MOVBE32rr_EVEX_REV : I<0x60, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
1168+
"movbe{l}\t{$src, $dst|$dst, $src}", []>,
1169+
EVEX, NoCD8, T_MAP4;
1170+
def MOVBE64rr_EVEX_REV : RI<0x60, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
1171+
"movbe{q}\t{$src, $dst|$dst, $src}", []>,
1172+
EVEX, NoCD8, T_MAP4;
1173+
}
1174+
}
1175+
11241176
//===----------------------------------------------------------------------===//
11251177
// RDRAND Instruction
11261178
//

llvm/lib/Target/X86/X86InstrRAOINT.td

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,30 @@ def X86rao_xor : SDNode<"X86ISD::AXOR", SDTRAOBinaryArith,
2525
def X86rao_and : SDNode<"X86ISD::AAND", SDTRAOBinaryArith,
2626
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
2727

28-
multiclass RAOINT_BASE<string OpcodeStr> {
28+
multiclass RAOINT_BASE<string OpcodeStr, string Suffix = ""> {
2929
let Predicates = [HasRAOINT] in
30-
def 32mr : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
31-
!strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
32-
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
33-
Sched<[WriteALURMW]>;
30+
def 32mr#Suffix : I<0xfc, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
31+
!strconcat("a", OpcodeStr, "{l}\t{$src, $dst|$dst, $src}"),
32+
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR32:$src)]>,
33+
Sched<[WriteALURMW]>;
3434

3535
let Predicates = [HasRAOINT, In64BitMode] in
36-
def 64mr : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
37-
!strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
38-
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
39-
Sched<[WriteALURMW]>, REX_W;
36+
def 64mr#Suffix : I<0xfc, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
37+
!strconcat("a", OpcodeStr, "{q}\t{$src, $dst|$dst, $src}"),
38+
[(!cast<SDNode>("X86rao_" # OpcodeStr) addr:$dst, GR64:$src)]>,
39+
Sched<[WriteALURMW]>, REX_W;
4040
}
4141

42+
let Predicates = [HasRAOINT, NoEGPR] in {
4243
defm AADD : RAOINT_BASE<"add">, T8;
4344
defm AAND : RAOINT_BASE<"and">, T8, PD;
4445
defm AOR : RAOINT_BASE<"or" >, T8, XD;
4546
defm AXOR : RAOINT_BASE<"xor">, T8, XS;
47+
}
48+
49+
let Predicates = [HasRAOINT, HasEGPR, In64BitMode] in {
50+
defm AADD : RAOINT_BASE<"add", "_EVEX">, EVEX, NoCD8, T_MAP4;
51+
defm AAND : RAOINT_BASE<"and", "_EVEX">, EVEX, NoCD8, T_MAP4, PD;
52+
defm AOR : RAOINT_BASE<"or", "_EVEX">, EVEX, NoCD8, T_MAP4, XD;
53+
defm AXOR : RAOINT_BASE<"xor", "_EVEX">, EVEX, NoCD8, T_MAP4, XS;
54+
}

llvm/test/CodeGen/X86/movbe.ll

Lines changed: 122 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,164 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
12
; RUN: llc -mtriple=x86_64-linux -mcpu=atom < %s | FileCheck %s
23
; RUN: llc -mtriple=x86_64-linux -mcpu=slm < %s | FileCheck %s -check-prefix=SLM
4+
; RUN: llc -mtriple=x86_64-linux -mcpu=slm -mattr=+egpr --show-mc-encoding < %s | FileCheck %s -check-prefix=EGPR
35

46
declare i16 @llvm.bswap.i16(i16) nounwind readnone
57
declare i32 @llvm.bswap.i32(i32) nounwind readnone
68
declare i64 @llvm.bswap.i64(i64) nounwind readnone
79

810
define void @test1(ptr nocapture %x, i16 %y) nounwind {
11+
; CHECK-LABEL: test1:
12+
; CHECK: # %bb.0:
13+
; CHECK-NEXT: movbew %si, (%rdi)
14+
; CHECK-NEXT: nop
15+
; CHECK-NEXT: nop
16+
; CHECK-NEXT: nop
17+
; CHECK-NEXT: nop
18+
; CHECK-NEXT: nop
19+
; CHECK-NEXT: nop
20+
; CHECK-NEXT: retq
21+
;
22+
; SLM-LABEL: test1:
23+
; SLM: # %bb.0:
24+
; SLM-NEXT: movbew %si, (%rdi)
25+
; SLM-NEXT: retq
26+
;
27+
; EGPR-LABEL: test1:
28+
; EGPR: # %bb.0:
29+
; EGPR-NEXT: movbew %si, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf1,0x37]
30+
; EGPR-NEXT: retq # encoding: [0xc3]
931
%bswap = call i16 @llvm.bswap.i16(i16 %y)
1032
store i16 %bswap, ptr %x, align 2
1133
ret void
12-
; CHECK-LABEL: test1:
13-
; CHECK: movbew %si, (%rdi)
14-
; SLM-LABEL: test1:
15-
; SLM: movbew %si, (%rdi)
1634
}
1735

1836
define i16 @test2(ptr %x) nounwind {
37+
; CHECK-LABEL: test2:
38+
; CHECK: # %bb.0:
39+
; CHECK-NEXT: movbew (%rdi), %ax
40+
; CHECK-NEXT: nop
41+
; CHECK-NEXT: nop
42+
; CHECK-NEXT: nop
43+
; CHECK-NEXT: nop
44+
; CHECK-NEXT: nop
45+
; CHECK-NEXT: nop
46+
; CHECK-NEXT: retq
47+
;
48+
; SLM-LABEL: test2:
49+
; SLM: # %bb.0:
50+
; SLM-NEXT: movbew (%rdi), %ax
51+
; SLM-NEXT: retq
52+
;
53+
; EGPR-LABEL: test2:
54+
; EGPR: # %bb.0:
55+
; EGPR-NEXT: movbew (%rdi), %ax # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xf0,0x07]
56+
; EGPR-NEXT: retq # encoding: [0xc3]
1957
%load = load i16, ptr %x, align 2
2058
%bswap = call i16 @llvm.bswap.i16(i16 %load)
2159
ret i16 %bswap
22-
; CHECK-LABEL: test2:
23-
; CHECK: movbew (%rdi), %ax
24-
; SLM-LABEL: test2:
25-
; SLM: movbew (%rdi), %ax
2660
}
2761

2862
define void @test3(ptr nocapture %x, i32 %y) nounwind {
63+
; CHECK-LABEL: test3:
64+
; CHECK: # %bb.0:
65+
; CHECK-NEXT: movbel %esi, (%rdi)
66+
; CHECK-NEXT: nop
67+
; CHECK-NEXT: nop
68+
; CHECK-NEXT: nop
69+
; CHECK-NEXT: nop
70+
; CHECK-NEXT: nop
71+
; CHECK-NEXT: nop
72+
; CHECK-NEXT: retq
73+
;
74+
; SLM-LABEL: test3:
75+
; SLM: # %bb.0:
76+
; SLM-NEXT: movbel %esi, (%rdi)
77+
; SLM-NEXT: retq
78+
;
79+
; EGPR-LABEL: test3:
80+
; EGPR: # %bb.0:
81+
; EGPR-NEXT: movbel %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf1,0x37]
82+
; EGPR-NEXT: retq # encoding: [0xc3]
2983
%bswap = call i32 @llvm.bswap.i32(i32 %y)
3084
store i32 %bswap, ptr %x, align 4
3185
ret void
32-
; CHECK-LABEL: test3:
33-
; CHECK: movbel %esi, (%rdi)
34-
; SLM-LABEL: test3:
35-
; SLM: movbel %esi, (%rdi)
3686
}
3787

3888
define i32 @test4(ptr %x) nounwind {
89+
; CHECK-LABEL: test4:
90+
; CHECK: # %bb.0:
91+
; CHECK-NEXT: movbel (%rdi), %eax
92+
; CHECK-NEXT: nop
93+
; CHECK-NEXT: nop
94+
; CHECK-NEXT: nop
95+
; CHECK-NEXT: nop
96+
; CHECK-NEXT: nop
97+
; CHECK-NEXT: nop
98+
; CHECK-NEXT: retq
99+
;
100+
; SLM-LABEL: test4:
101+
; SLM: # %bb.0:
102+
; SLM-NEXT: movbel (%rdi), %eax
103+
; SLM-NEXT: retq
104+
;
105+
; EGPR-LABEL: test4:
106+
; EGPR: # %bb.0:
107+
; EGPR-NEXT: movbel (%rdi), %eax # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xf0,0x07]
108+
; EGPR-NEXT: retq # encoding: [0xc3]
39109
%load = load i32, ptr %x, align 4
40110
%bswap = call i32 @llvm.bswap.i32(i32 %load)
41111
ret i32 %bswap
42-
; CHECK-LABEL: test4:
43-
; CHECK: movbel (%rdi), %eax
44-
; SLM-LABEL: test4:
45-
; SLM: movbel (%rdi), %eax
46112
}
47113

48114
define void @test5(ptr %x, i64 %y) nounwind {
115+
; CHECK-LABEL: test5:
116+
; CHECK: # %bb.0:
117+
; CHECK-NEXT: movbeq %rsi, (%rdi)
118+
; CHECK-NEXT: nop
119+
; CHECK-NEXT: nop
120+
; CHECK-NEXT: nop
121+
; CHECK-NEXT: nop
122+
; CHECK-NEXT: nop
123+
; CHECK-NEXT: nop
124+
; CHECK-NEXT: retq
125+
;
126+
; SLM-LABEL: test5:
127+
; SLM: # %bb.0:
128+
; SLM-NEXT: movbeq %rsi, (%rdi)
129+
; SLM-NEXT: retq
130+
;
131+
; EGPR-LABEL: test5:
132+
; EGPR: # %bb.0:
133+
; EGPR-NEXT: movbeq %rsi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf1,0x37]
134+
; EGPR-NEXT: retq # encoding: [0xc3]
49135
%bswap = call i64 @llvm.bswap.i64(i64 %y)
50136
store i64 %bswap, ptr %x, align 8
51137
ret void
52-
; CHECK-LABEL: test5:
53-
; CHECK: movbeq %rsi, (%rdi)
54-
; SLM-LABEL: test5:
55-
; SLM: movbeq %rsi, (%rdi)
56138
}
57139

58140
define i64 @test6(ptr %x) nounwind {
141+
; CHECK-LABEL: test6:
142+
; CHECK: # %bb.0:
143+
; CHECK-NEXT: movbeq (%rdi), %rax
144+
; CHECK-NEXT: nop
145+
; CHECK-NEXT: nop
146+
; CHECK-NEXT: nop
147+
; CHECK-NEXT: nop
148+
; CHECK-NEXT: nop
149+
; CHECK-NEXT: nop
150+
; CHECK-NEXT: retq
151+
;
152+
; SLM-LABEL: test6:
153+
; SLM: # %bb.0:
154+
; SLM-NEXT: movbeq (%rdi), %rax
155+
; SLM-NEXT: retq
156+
;
157+
; EGPR-LABEL: test6:
158+
; EGPR: # %bb.0:
159+
; EGPR-NEXT: movbeq (%rdi), %rax # EVEX TO LEGACY Compression encoding: [0x48,0x0f,0x38,0xf0,0x07]
160+
; EGPR-NEXT: retq # encoding: [0xc3]
59161
%load = load i64, ptr %x, align 8
60162
%bswap = call i64 @llvm.bswap.i64(i64 %load)
61163
ret i64 %bswap
62-
; CHECK-LABEL: test6:
63-
; CHECK: movbeq (%rdi), %rax
64-
; SLM-LABEL: test6:
65-
; SLM: movbeq (%rdi), %rax
66164
}

llvm/test/CodeGen/X86/raoint-intrinsics-32.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+raoint | FileCheck %s --check-prefixes=X64
33
; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+raoint | FileCheck %s --check-prefixes=X86
4+
; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+raoint,+egpr --show-mc-encoding | FileCheck %s --check-prefixes=EGPR
45

56
define void @test_int_x86_aadd32(ptr %A, i32 %B) {
67
; X64-LABEL: test_int_x86_aadd32:
@@ -14,6 +15,11 @@ define void @test_int_x86_aadd32(ptr %A, i32 %B) {
1415
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
1516
; X86-NEXT: aaddl %eax, (%ecx) # encoding: [0x0f,0x38,0xfc,0x01]
1617
; X86-NEXT: retl # encoding: [0xc3]
18+
;
19+
; EGPR-LABEL: test_int_x86_aadd32:
20+
; EGPR: # %bb.0:
21+
; EGPR-NEXT: aaddl %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x0f,0x38,0xfc,0x37]
22+
; EGPR-NEXT: retq # encoding: [0xc3]
1723
call void @llvm.x86.aadd32(ptr %A, i32 %B)
1824
ret void
1925
}
@@ -31,6 +37,11 @@ define void @test_int_x86_aand32(ptr %A, i32 %B) {
3137
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
3238
; X86-NEXT: aandl %eax, (%ecx) # encoding: [0x66,0x0f,0x38,0xfc,0x01]
3339
; X86-NEXT: retl # encoding: [0xc3]
40+
;
41+
; EGPR-LABEL: test_int_x86_aand32:
42+
; EGPR: # %bb.0:
43+
; EGPR-NEXT: aandl %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0x66,0x0f,0x38,0xfc,0x37]
44+
; EGPR-NEXT: retq # encoding: [0xc3]
3445
call void @llvm.x86.aand32(ptr %A, i32 %B)
3546
ret void
3647
}
@@ -48,6 +59,11 @@ define void @test_int_x86_aor32(ptr %A, i32 %B) {
4859
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
4960
; X86-NEXT: aorl %eax, (%ecx) # encoding: [0xf2,0x0f,0x38,0xfc,0x01]
5061
; X86-NEXT: retl # encoding: [0xc3]
62+
;
63+
; EGPR-LABEL: test_int_x86_aor32:
64+
; EGPR: # %bb.0:
65+
; EGPR-NEXT: aorl %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0xf2,0x0f,0x38,0xfc,0x37]
66+
; EGPR-NEXT: retq # encoding: [0xc3]
5167
call void @llvm.x86.aor32(ptr %A, i32 %B)
5268
ret void
5369
}
@@ -65,6 +81,11 @@ define void @test_int_x86_axor32(ptr %A, i32 %B) {
6581
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
6682
; X86-NEXT: axorl %eax, (%ecx) # encoding: [0xf3,0x0f,0x38,0xfc,0x01]
6783
; X86-NEXT: retl # encoding: [0xc3]
84+
;
85+
; EGPR-LABEL: test_int_x86_axor32:
86+
; EGPR: # %bb.0:
87+
; EGPR-NEXT: axorl %esi, (%rdi) # EVEX TO LEGACY Compression encoding: [0xf3,0x0f,0x38,0xfc,0x37]
88+
; EGPR-NEXT: retq # encoding: [0xc3]
6889
call void @llvm.x86.axor32(ptr %A, i32 %B)
6990
ret void
7091
}

0 commit comments

Comments
 (0)