Skip to content

Commit db9de22

Browse files
committed
Teach the AArch64 backend patterns to generate the EOR3 instruction.
Adds patterns to match the EOR3 instruction. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D108793
1 parent daf0dfb commit db9de22

File tree

2 files changed

+158
-0
lines changed

2 files changed

+158
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -979,6 +979,15 @@ def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
979979
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
980980
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
981981

982+
class EOR3_pattern<ValueType VecTy>
983+
: Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
984+
(EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
985+
986+
def : EOR3_pattern<v16i8>;
987+
def : EOR3_pattern<v8i16>;
988+
def : EOR3_pattern<v4i32>;
989+
def : EOR3_pattern<v2i64>;
990+
982991
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
983992
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
984993
def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;

llvm/test/CodeGen/AArch64/eor3.ll

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
2+
; RUN: llc -mtriple=aarch64-none-eabi -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
3+
; RUN: llc -mtriple=aarch64-none-eabi -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
4+
5+
define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
6+
; SHA3-LABEL: eor3_16x8_left:
7+
; SHA3: // %bb.0:
8+
; SHA3-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
9+
; SHA3-NEXT: ret
10+
;
11+
; NOSHA3-LABEL: eor3_16x8_left:
12+
; NOSHA3: // %bb.0:
13+
; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b
14+
; NOSHA3-NEXT: eor v0.16b, v2.16b, v0.16b
15+
; NOSHA3-NEXT: ret
16+
%4 = xor <16 x i8> %0, %1
17+
%5 = xor <16 x i8> %2, %4
18+
ret <16 x i8> %5
19+
}
20+
21+
define <16 x i8> @eor3_16x8_right(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
22+
; SHA3-LABEL: eor3_16x8_right:
23+
; SHA3: // %bb.0:
24+
; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
25+
; SHA3-NEXT: ret
26+
;
27+
; NOSHA3-LABEL: eor3_16x8_right:
28+
; NOSHA3: // %bb.0:
29+
; NOSHA3-NEXT: eor v1.16b, v1.16b, v2.16b
30+
; NOSHA3-NEXT: eor v0.16b, v1.16b, v0.16b
31+
; NOSHA3-NEXT: ret
32+
%4 = xor <16 x i8> %1, %2
33+
%5 = xor <16 x i8> %4, %0
34+
ret <16 x i8> %5
35+
}
36+
37+
define <8 x i16> @eor3_8x16_left(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
38+
; SHA3-LABEL: eor3_8x16_left:
39+
; SHA3: // %bb.0:
40+
; SHA3-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
41+
; SHA3-NEXT: ret
42+
;
43+
; NOSHA3-LABEL: eor3_8x16_left:
44+
; NOSHA3: // %bb.0:
45+
; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b
46+
; NOSHA3-NEXT: eor v0.16b, v2.16b, v0.16b
47+
; NOSHA3-NEXT: ret
48+
%4 = xor <8 x i16> %0, %1
49+
%5 = xor <8 x i16> %2, %4
50+
ret <8 x i16> %5
51+
}
52+
53+
define <8 x i16> @eor3_8x16_right(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
54+
; SHA3-LABEL: eor3_8x16_right:
55+
; SHA3: // %bb.0:
56+
; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
57+
; SHA3-NEXT: ret
58+
;
59+
; NOSHA3-LABEL: eor3_8x16_right:
60+
; NOSHA3: // %bb.0:
61+
; NOSHA3-NEXT: eor v1.16b, v1.16b, v2.16b
62+
; NOSHA3-NEXT: eor v0.16b, v1.16b, v0.16b
63+
; NOSHA3-NEXT: ret
64+
%4 = xor <8 x i16> %1, %2
65+
%5 = xor <8 x i16> %4, %0
66+
ret <8 x i16> %5
67+
}
68+
69+
define <4 x i32> @eor3_4x32_left(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
70+
; SHA3-LABEL: eor3_4x32_left:
71+
; SHA3: // %bb.0:
72+
; SHA3-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
73+
; SHA3-NEXT: ret
74+
;
75+
; NOSHA3-LABEL: eor3_4x32_left:
76+
; NOSHA3: // %bb.0:
77+
; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b
78+
; NOSHA3-NEXT: eor v0.16b, v2.16b, v0.16b
79+
; NOSHA3-NEXT: ret
80+
%4 = xor <4 x i32> %0, %1
81+
%5 = xor <4 x i32> %2, %4
82+
ret <4 x i32> %5
83+
}
84+
85+
define <4 x i32> @eor3_4x32_right(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
86+
; SHA3-LABEL: eor3_4x32_right:
87+
; SHA3: // %bb.0:
88+
; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
89+
; SHA3-NEXT: ret
90+
;
91+
; NOSHA3-LABEL: eor3_4x32_right:
92+
; NOSHA3: // %bb.0:
93+
; NOSHA3-NEXT: eor v1.16b, v1.16b, v2.16b
94+
; NOSHA3-NEXT: eor v0.16b, v1.16b, v0.16b
95+
; NOSHA3-NEXT: ret
96+
%4 = xor <4 x i32> %1, %2
97+
%5 = xor <4 x i32> %4, %0
98+
ret <4 x i32> %5
99+
}
100+
101+
define <2 x i64> @eor3_2x64_left(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
102+
; SHA3-LABEL: eor3_2x64_left:
103+
; SHA3: // %bb.0:
104+
; SHA3-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
105+
; SHA3-NEXT: ret
106+
;
107+
; NOSHA3-LABEL: eor3_2x64_left:
108+
; NOSHA3: // %bb.0:
109+
; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b
110+
; NOSHA3-NEXT: eor v0.16b, v2.16b, v0.16b
111+
; NOSHA3-NEXT: ret
112+
%4 = xor <2 x i64> %0, %1
113+
%5 = xor <2 x i64> %2, %4
114+
ret <2 x i64> %5
115+
}
116+
117+
define <2 x i64> @eor3_2x64_right(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
118+
; SHA3-LABEL: eor3_2x64_right:
119+
; SHA3: // %bb.0:
120+
; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
121+
; SHA3-NEXT: ret
122+
;
123+
; NOSHA3-LABEL: eor3_2x64_right:
124+
; NOSHA3: // %bb.0:
125+
; NOSHA3-NEXT: eor v1.16b, v1.16b, v2.16b
126+
; NOSHA3-NEXT: eor v0.16b, v1.16b, v0.16b
127+
; NOSHA3-NEXT: ret
128+
%4 = xor <2 x i64> %1, %2
129+
%5 = xor <2 x i64> %4, %0
130+
ret <2 x i64> %5
131+
}
132+
133+
define <2 x i64> @eor3_vnot(<2 x i64> %0, <2 x i64> %1) {
134+
; SHA3-LABEL: eor3_vnot:
135+
; SHA3: // %bb.0:
136+
; SHA3-NEXT: eor v0.16b, v0.16b, v1.16b
137+
; SHA3-NEXT: mvn v0.16b, v0.16b
138+
; SHA3-NEXT: ret
139+
;
140+
; NOSHA3-LABEL: eor3_vnot:
141+
; NOSHA3: // %bb.0:
142+
; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b
143+
; NOSHA3-NEXT: mvn v0.16b, v0.16b
144+
; NOSHA3-NEXT: ret
145+
%3 = xor <2 x i64> %0, <i64 -1, i64 -1>
146+
%4 = xor <2 x i64> %3, %1
147+
ret <2 x i64> %4
148+
}
149+

0 commit comments

Comments
 (0)