Skip to content

Commit 11c7135

Browse files
committed
[AArch64][GlobalISel] Legalize BSWAP for Vector Types
1 parent b643b1c commit 11c7135

File tree

7 files changed

+155
-93
lines changed

7 files changed

+155
-93
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9031,7 +9031,6 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
90319031
SDLoc dl(N);
90329032
EVT VT = N->getValueType(0);
90339033
SDValue Op = N->getOperand(0);
9034-
90359034
if (!VT.isSimple())
90369035
return SDValue();
90379036

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5163,6 +5163,13 @@ def : Pat<(v8i16 (concat_vectors
51635163
(v4i32 VImm8000)))))),
51645164
(SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
51655165

5166+
// Select BSWAP vector instructions into REV instructions
5167+
def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))), (v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>;
5168+
def : Pat<(v8i16 (bswap (v8i16 V128:$Rn))), (v8i16 (REV16v16i8 (v8i16 V128:$Rn)))>;
5169+
def : Pat<(v2i32 (bswap (v2i32 V64:$Rn))), (v2i32 (REV32v8i8 (v2i32 V64:$Rn)))>;
5170+
def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))), (v4i32 (REV32v16i8 (v4i32 V128:$Rn)))>;
5171+
def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))), (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>;
5172+
51665173
//===----------------------------------------------------------------------===//
51675174
// Advanced SIMD three vector instructions.
51685175
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -2567,43 +2567,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
25672567
return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
25682568
}
25692569

2570-
case TargetOpcode::G_BSWAP: {
2571-
// Handle vector types for G_BSWAP directly.
2572-
Register DstReg = I.getOperand(0).getReg();
2573-
LLT DstTy = MRI.getType(DstReg);
2574-
2575-
// We should only get vector types here; everything else is handled by the
2576-
// importer right now.
2577-
if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2578-
LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2579-
return false;
2580-
}
2581-
2582-
// Only handle 4 and 2 element vectors for now.
2583-
// TODO: 16-bit elements.
2584-
unsigned NumElts = DstTy.getNumElements();
2585-
if (NumElts != 4 && NumElts != 2) {
2586-
LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2587-
return false;
2588-
}
2589-
2590-
// Choose the correct opcode for the supported types. Right now, that's
2591-
// v2s32, v4s32, and v2s64.
2592-
unsigned Opc = 0;
2593-
unsigned EltSize = DstTy.getElementType().getSizeInBits();
2594-
if (EltSize == 32)
2595-
Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2596-
: AArch64::REV32v16i8;
2597-
else if (EltSize == 64)
2598-
Opc = AArch64::REV64v16i8;
2599-
2600-
// We should always get something by the time we get here...
2601-
assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2602-
2603-
I.setDesc(TII.get(Opc));
2604-
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2605-
}
2606-
26072570
case TargetOpcode::G_FCONSTANT:
26082571
case TargetOpcode::G_CONSTANT: {
26092572
const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
118118
.clampMaxNumElements(0, p0, 2);
119119

120120
getActionDefinitionsBuilder(G_BSWAP)
121-
.legalFor({s32, s64, v4s32, v2s32, v2s64})
122-
.widenScalarToNextPow2(0)
123-
.clampScalar(0, s32, s64);
121+
.legalFor({s32, s64, v4s16, v8s16, v2s32, v4s32, v2s64})
122+
.widenScalarToNextPow2(0, 32)
123+
.clampScalar(0, s32, s64)
124+
.clampNumElements(0, v4s16, v8s16)
125+
.clampNumElements(0, v2s32, v4s32)
126+
.clampNumElements(0, v2s64, v2s64)
127+
.moreElementsToNextPow2(0);
124128

125129
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
126130
.legalFor({s32, s64, v2s32, v2s64, v4s32, v4s16, v8s16, v16s8, v8s8})

llvm/test/CodeGen/AArch64/GlobalISel/legalize-bswap.mir

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ body: |
1111
1212
; CHECK-LABEL: name: bswap_s16
1313
; CHECK: liveins: $w0
14-
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
15-
; CHECK: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
16-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
17-
; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s64)
18-
; CHECK: $w0 = COPY [[LSHR]](s32)
19-
; CHECK: RET_ReallyLR implicit $w0
14+
; CHECK-NEXT: {{ $}}
15+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
16+
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(s32) = G_BSWAP [[COPY]]
17+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
18+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BSWAP]], [[C]](s64)
19+
; CHECK-NEXT: $w0 = COPY [[LSHR]](s32)
20+
; CHECK-NEXT: RET_ReallyLR implicit $w0
2021
%1:_(s32) = COPY $w0
2122
%0:_(s16) = G_TRUNC %1(s32)
2223
%2:_(s16) = G_BSWAP %0
@@ -32,10 +33,11 @@ body: |
3233
liveins: $w0
3334
; CHECK-LABEL: name: bswap_s32_legal
3435
; CHECK: liveins: $w0
35-
; CHECK: %copy:_(s32) = COPY $w0
36-
; CHECK: %bswap:_(s32) = G_BSWAP %copy
37-
; CHECK: $w0 = COPY %bswap(s32)
38-
; CHECK: RET_ReallyLR implicit $w0
36+
; CHECK-NEXT: {{ $}}
37+
; CHECK-NEXT: %copy:_(s32) = COPY $w0
38+
; CHECK-NEXT: %bswap:_(s32) = G_BSWAP %copy
39+
; CHECK-NEXT: $w0 = COPY %bswap(s32)
40+
; CHECK-NEXT: RET_ReallyLR implicit $w0
3941
%copy:_(s32) = COPY $w0
4042
%bswap:_(s32) = G_BSWAP %copy
4143
$w0 = COPY %bswap(s32)
@@ -49,27 +51,65 @@ body: |
4951
liveins: $x0
5052
; CHECK-LABEL: name: bswap_s64_legal
5153
; CHECK: liveins: $x0
52-
; CHECK: %copy:_(s64) = COPY $x0
53-
; CHECK: %bswap:_(s64) = G_BSWAP %copy
54-
; CHECK: $x0 = COPY %bswap(s64)
55-
; CHECK: RET_ReallyLR implicit $x0
54+
; CHECK-NEXT: {{ $}}
55+
; CHECK-NEXT: %copy:_(s64) = COPY $x0
56+
; CHECK-NEXT: %bswap:_(s64) = G_BSWAP %copy
57+
; CHECK-NEXT: $x0 = COPY %bswap(s64)
58+
; CHECK-NEXT: RET_ReallyLR implicit $x0
5659
%copy:_(s64) = COPY $x0
5760
%bswap:_(s64) = G_BSWAP %copy
5861
$x0 = COPY %bswap(s64)
5962
RET_ReallyLR implicit $x0
6063
...
6164
---
65+
name: bswap_v4s16_legal
66+
tracksRegLiveness: true
67+
body: |
68+
bb.0:
69+
liveins: $d0
70+
; CHECK-LABEL: name: bswap_v4s16_legal
71+
; CHECK: liveins: $d0
72+
; CHECK-NEXT: {{ $}}
73+
; CHECK-NEXT: %copy:_(<4 x s16>) = COPY $d0
74+
; CHECK-NEXT: %bswap:_(<4 x s16>) = G_BSWAP %copy
75+
; CHECK-NEXT: $d0 = COPY %bswap(<4 x s16>)
76+
; CHECK-NEXT: RET_ReallyLR implicit $d0
77+
%copy:_(<4 x s16>) = COPY $d0
78+
%bswap:_(<4 x s16>) = G_BSWAP %copy
79+
$d0 = COPY %bswap(<4 x s16>)
80+
RET_ReallyLR implicit $d0
81+
...
82+
---
83+
name: bswap_v8s16_legal
84+
tracksRegLiveness: true
85+
body: |
86+
bb.0:
87+
liveins: $q0
88+
; CHECK-LABEL: name: bswap_v8s16_legal
89+
; CHECK: liveins: $q0
90+
; CHECK-NEXT: {{ $}}
91+
; CHECK-NEXT: %copy:_(<8 x s16>) = COPY $q0
92+
; CHECK-NEXT: %bswap:_(<8 x s16>) = G_BSWAP %copy
93+
; CHECK-NEXT: $q0 = COPY %bswap(<8 x s16>)
94+
; CHECK-NEXT: RET_ReallyLR implicit $q0
95+
%copy:_(<8 x s16>) = COPY $q0
96+
%bswap:_(<8 x s16>) = G_BSWAP %copy
97+
$q0 = COPY %bswap(<8 x s16>)
98+
RET_ReallyLR implicit $q0
99+
...
100+
---
62101
name: bswap_v4s32_legal
63102
tracksRegLiveness: true
64103
body: |
65104
bb.0:
66105
liveins: $q0
67106
; CHECK-LABEL: name: bswap_v4s32_legal
68107
; CHECK: liveins: $q0
69-
; CHECK: %copy:_(<4 x s32>) = COPY $q0
70-
; CHECK: %bswap:_(<4 x s32>) = G_BSWAP %copy
71-
; CHECK: $q0 = COPY %bswap(<4 x s32>)
72-
; CHECK: RET_ReallyLR implicit $q0
108+
; CHECK-NEXT: {{ $}}
109+
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $q0
110+
; CHECK-NEXT: %bswap:_(<4 x s32>) = G_BSWAP %copy
111+
; CHECK-NEXT: $q0 = COPY %bswap(<4 x s32>)
112+
; CHECK-NEXT: RET_ReallyLR implicit $q0
73113
%copy:_(<4 x s32>) = COPY $q0
74114
%bswap:_(<4 x s32>) = G_BSWAP %copy
75115
$q0 = COPY %bswap(<4 x s32>)
@@ -83,10 +123,11 @@ body: |
83123
liveins: $d0
84124
; CHECK-LABEL: name: bswap_v2s32_legal
85125
; CHECK: liveins: $d0
86-
; CHECK: %copy:_(<2 x s32>) = COPY $d0
87-
; CHECK: %bswap:_(<2 x s32>) = G_BSWAP %copy
88-
; CHECK: $d0 = COPY %bswap(<2 x s32>)
89-
; CHECK: RET_ReallyLR implicit $d0
126+
; CHECK-NEXT: {{ $}}
127+
; CHECK-NEXT: %copy:_(<2 x s32>) = COPY $d0
128+
; CHECK-NEXT: %bswap:_(<2 x s32>) = G_BSWAP %copy
129+
; CHECK-NEXT: $d0 = COPY %bswap(<2 x s32>)
130+
; CHECK-NEXT: RET_ReallyLR implicit $d0
90131
%copy:_(<2 x s32>) = COPY $d0
91132
%bswap:_(<2 x s32>) = G_BSWAP %copy
92133
$d0 = COPY %bswap(<2 x s32>)
@@ -100,10 +141,11 @@ body: |
100141
liveins: $q0
101142
; CHECK-LABEL: name: bswap_v2s64_legal
102143
; CHECK: liveins: $q0
103-
; CHECK: %copy:_(<2 x s64>) = COPY $q0
104-
; CHECK: %bswap:_(<2 x s64>) = G_BSWAP %copy
105-
; CHECK: $q0 = COPY %bswap(<2 x s64>)
106-
; CHECK: RET_ReallyLR implicit $q0
144+
; CHECK-NEXT: {{ $}}
145+
; CHECK-NEXT: %copy:_(<2 x s64>) = COPY $q0
146+
; CHECK-NEXT: %bswap:_(<2 x s64>) = G_BSWAP %copy
147+
; CHECK-NEXT: $q0 = COPY %bswap(<2 x s64>)
148+
; CHECK-NEXT: RET_ReallyLR implicit $q0
107149
%copy:_(<2 x s64>) = COPY $q0
108150
%bswap:_(<2 x s64>) = G_BSWAP %copy
109151
$q0 = COPY %bswap(<2 x s64>)

llvm/test/CodeGen/AArch64/GlobalISel/select-bswap.mir

Lines changed: 72 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,11 @@ body: |
1616
liveins: $w0
1717
1818
; CHECK-LABEL: name: bswap_s32
19-
; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
20-
; CHECK: [[REVWr:%[0-9]+]]:gpr32 = REVWr [[COPY]]
21-
; CHECK: $w0 = COPY [[REVWr]]
19+
; CHECK: liveins: $w0
20+
; CHECK-NEXT: {{ $}}
21+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
22+
; CHECK-NEXT: [[REVWr:%[0-9]+]]:gpr32 = REVWr [[COPY]]
23+
; CHECK-NEXT: $w0 = COPY [[REVWr]]
2224
%0(s32) = COPY $w0
2325
%1(s32) = G_BSWAP %0
2426
$w0 = COPY %1
@@ -38,13 +40,62 @@ body: |
3840
liveins: $x0
3941
4042
; CHECK-LABEL: name: bswap_s64
41-
; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
42-
; CHECK: [[REVXr:%[0-9]+]]:gpr64 = REVXr [[COPY]]
43-
; CHECK: $x0 = COPY [[REVXr]]
43+
; CHECK: liveins: $x0
44+
; CHECK-NEXT: {{ $}}
45+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
46+
; CHECK-NEXT: [[REVXr:%[0-9]+]]:gpr64 = REVXr [[COPY]]
47+
; CHECK-NEXT: $x0 = COPY [[REVXr]]
4448
%0(s64) = COPY $x0
4549
%1(s64) = G_BSWAP %0
4650
$x0 = COPY %1
4751
52+
...
53+
---
54+
name: bswap_v4s16
55+
alignment: 4
56+
legalized: true
57+
regBankSelected: true
58+
tracksRegLiveness: true
59+
machineFunctionInfo: {}
60+
body: |
61+
bb.0:
62+
liveins: $d0
63+
64+
; CHECK-LABEL: name: bswap_v4s16
65+
; CHECK: liveins: $d0
66+
; CHECK-NEXT: {{ $}}
67+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
68+
; CHECK-NEXT: [[REV16v8i8_:%[0-9]+]]:fpr64 = REV16v8i8 [[COPY]]
69+
; CHECK-NEXT: $d0 = COPY [[REV16v8i8_]]
70+
; CHECK-NEXT: RET_ReallyLR implicit $d0
71+
%0:fpr(<4 x s16>) = COPY $d0
72+
%1:fpr(<4 x s16>) = G_BSWAP %0
73+
$d0 = COPY %1(<4 x s16>)
74+
RET_ReallyLR implicit $d0
75+
76+
...
77+
---
78+
name: bswap_v8s16
79+
alignment: 4
80+
legalized: true
81+
regBankSelected: true
82+
tracksRegLiveness: true
83+
machineFunctionInfo: {}
84+
body: |
85+
bb.0:
86+
liveins: $q0
87+
; CHECK-LABEL: name: bswap_v8s16
88+
; CHECK: liveins: $q0
89+
; CHECK-NEXT: {{ $}}
90+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
91+
; CHECK-NEXT: [[REV16v16i8_:%[0-9]+]]:fpr128 = REV16v16i8 [[COPY]]
92+
; CHECK-NEXT: $q0 = COPY [[REV16v16i8_]]
93+
; CHECK-NEXT: RET_ReallyLR implicit $q0
94+
%0:fpr(<8 x s16>) = COPY $q0
95+
%1:fpr(<8 x s16>) = G_BSWAP %0
96+
$q0 = COPY %1(<8 x s16>)
97+
RET_ReallyLR implicit $q0
98+
4899
...
49100
---
50101
name: bswap_v4s32
@@ -59,10 +110,11 @@ body: |
59110
60111
; CHECK-LABEL: name: bswap_v4s32
61112
; CHECK: liveins: $q0
62-
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
63-
; CHECK: [[REV32v16i8_:%[0-9]+]]:fpr128 = REV32v16i8 [[COPY]]
64-
; CHECK: $q0 = COPY [[REV32v16i8_]]
65-
; CHECK: RET_ReallyLR implicit $q0
113+
; CHECK-NEXT: {{ $}}
114+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
115+
; CHECK-NEXT: [[REV32v16i8_:%[0-9]+]]:fpr128 = REV32v16i8 [[COPY]]
116+
; CHECK-NEXT: $q0 = COPY [[REV32v16i8_]]
117+
; CHECK-NEXT: RET_ReallyLR implicit $q0
66118
%0:fpr(<4 x s32>) = COPY $q0
67119
%1:fpr(<4 x s32>) = G_BSWAP %0
68120
$q0 = COPY %1(<4 x s32>)
@@ -82,10 +134,11 @@ body: |
82134
83135
; CHECK-LABEL: name: bswap_v2s32
84136
; CHECK: liveins: $d0
85-
; CHECK: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
86-
; CHECK: [[REV32v8i8_:%[0-9]+]]:fpr64 = REV32v8i8 [[COPY]]
87-
; CHECK: $d0 = COPY [[REV32v8i8_]]
88-
; CHECK: RET_ReallyLR implicit $d0
137+
; CHECK-NEXT: {{ $}}
138+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY $d0
139+
; CHECK-NEXT: [[REV32v8i8_:%[0-9]+]]:fpr64 = REV32v8i8 [[COPY]]
140+
; CHECK-NEXT: $d0 = COPY [[REV32v8i8_]]
141+
; CHECK-NEXT: RET_ReallyLR implicit $d0
89142
%0:fpr(<2 x s32>) = COPY $d0
90143
%1:fpr(<2 x s32>) = G_BSWAP %0
91144
$d0 = COPY %1(<2 x s32>)
@@ -105,10 +158,11 @@ body: |
105158
106159
; CHECK-LABEL: name: bswap_v2s64
107160
; CHECK: liveins: $q0
108-
; CHECK: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
109-
; CHECK: [[REV64v16i8_:%[0-9]+]]:fpr128 = REV64v16i8 [[COPY]]
110-
; CHECK: $q0 = COPY [[REV64v16i8_]]
111-
; CHECK: RET_ReallyLR implicit $q0
161+
; CHECK-NEXT: {{ $}}
162+
; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr128 = COPY $q0
163+
; CHECK-NEXT: [[REV64v16i8_:%[0-9]+]]:fpr128 = REV64v16i8 [[COPY]]
164+
; CHECK-NEXT: $q0 = COPY [[REV64v16i8_]]
165+
; CHECK-NEXT: RET_ReallyLR implicit $q0
112166
%0:fpr(<2 x s64>) = COPY $q0
113167
%1:fpr(<2 x s64>) = G_BSWAP %0
114168
$q0 = COPY %1(<2 x s64>)

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,7 @@
22
; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
33
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
44

5-
; CHECK-GI: warning: Instruction selection used fallback path for bswap_v8i16
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bswap_v2i16
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bswap_v16i16
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bswap_v8i32
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bswap_v4i64
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bswap_v3i16
11-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bswap_v7i16
12-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bswap_v3i32
5+
; CHECK-GI: warning: Instruction selection used fallback path for bswap_v2i16
136

147
; ====== Scalar Tests =====
158
define i16 @bswap_i16(i16 %a){

0 commit comments

Comments
 (0)