Skip to content

Commit 3e9e860

Browse files
committed
[AArch64][GlobalISel] Legalize ptr shuffle vector to s64
This converts all ptr element shuffle vectors to s64, so that the existing vector legalization handling can lower them as needed. I'm not sure if bitcast/inttoptr/ptrtoint is intended to be necessary for vectors of pointers, but it uses buildCast for the casts, which now generates a ptrtoint/inttoptr.
1 parent 42da815 commit 3e9e860

File tree

8 files changed

+78
-32
lines changed

8 files changed

+78
-32
lines changed

llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ class LegalizerHelper {
378378
LLT CastTy);
379379
LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
380380
LLT CastTy);
381+
LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
382+
LLT CastTy);
381383
LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx,
382384
LLT CastTy);
383385
LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx,

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3697,6 +3697,41 @@ LegalizerHelper::bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx,
36973697
return Legalized;
36983698
}
36993699

3700+
// This bitcasts a shuffle vector to a different type currently of the same
3701+
// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3702+
// will be used instead.
3703+
//
3704+
// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3705+
// ===>
3706+
// <4 x s64> = G_PTRTOINT <4 x p0>
3707+
// <4 x s64> = G_PTRTOINT <4 x p0>
3708+
// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3709+
// <16 x p0> = G_INTTOPTR <16 x s64>
3710+
LegalizerHelper::LegalizeResult
3711+
LegalizerHelper::bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx,
3712+
LLT CastTy) {
3713+
auto ShuffleMI = dyn_cast<GShuffleVector>(&MI);
3714+
LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3715+
LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3716+
3717+
// We currently only handle vectors of the same size.
3718+
if (TypeIdx != 0 ||
3719+
CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3720+
CastTy.getElementCount() != DstTy.getElementCount())
3721+
return UnableToLegalize;
3722+
3723+
LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3724+
3725+
auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3726+
auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3727+
auto Shuf =
3728+
MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3729+
MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3730+
3731+
MI.eraseFromParent();
3732+
return Legalized;
3733+
}
3734+
37003735
/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
37013736
///
37023737
/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
@@ -4133,6 +4168,8 @@ LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
41334168
return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
41344169
case TargetOpcode::G_CONCAT_VECTORS:
41354170
return bitcastConcatVector(MI, TypeIdx, CastTy);
4171+
case TargetOpcode::G_SHUFFLE_VECTOR:
4172+
return bitcastShuffleVector(MI, TypeIdx, CastTy);
41364173
case TargetOpcode::G_EXTRACT_SUBVECTOR:
41374174
return bitcastExtractSubvector(MI, TypeIdx, CastTy);
41384175
case TargetOpcode::G_INSERT_SUBVECTOR:

llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -600,12 +600,13 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
600600
return buildCopy(Dst, Src);
601601

602602
unsigned Opcode;
603-
if (SrcTy.isPointer() && DstTy.isScalar())
603+
if (SrcTy.isPointerOrPointerVector())
604604
Opcode = TargetOpcode::G_PTRTOINT;
605-
else if (DstTy.isPointer() && SrcTy.isScalar())
605+
else if (DstTy.isPointerOrPointerVector())
606606
Opcode = TargetOpcode::G_INTTOPTR;
607607
else {
608-
assert(!SrcTy.isPointer() && !DstTy.isPointer() && "n G_ADDRCAST yet");
608+
assert(!SrcTy.isPointerOrPointerVector() &&
609+
!DstTy.isPointerOrPointerVector() && "no G_ADDRCAST yet");
609610
Opcode = TargetOpcode::G_BITCAST;
610611
}
611612

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -838,13 +838,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
838838
getActionDefinitionsBuilder(G_PTRTOINT)
839839
.legalFor({{s64, p0}, {v2s64, v2p0}})
840840
.widenScalarToNextPow2(0, 64)
841-
.clampScalar(0, s64, s64);
841+
.clampScalar(0, s64, s64)
842+
.clampMaxNumElements(0, s64, 2);
842843

843844
getActionDefinitionsBuilder(G_INTTOPTR)
844845
.unsupportedIf([&](const LegalityQuery &Query) {
845846
return Query.Types[0].getSizeInBits() != Query.Types[1].getSizeInBits();
846847
})
847-
.legalFor({{p0, s64}, {v2p0, v2s64}});
848+
.legalFor({{p0, s64}, {v2p0, v2s64}})
849+
.clampMaxNumElements(1, s64, 2);
848850

849851
// Casts for 32 and 64-bit width type are just copies.
850852
// Same for 128-bit width type, except they are on the FPR bank.
@@ -1051,7 +1053,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10511053
if (DstTy != SrcTy)
10521054
return false;
10531055
return llvm::is_contained(
1054-
{v2s64, v2p0, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
1056+
{v2s64, v2s32, v4s32, v4s16, v16s8, v8s8, v8s16}, DstTy);
10551057
})
10561058
// G_SHUFFLE_VECTOR can have scalar sources (from 1 x s vectors), we
10571059
// just want those lowered into G_BUILD_VECTOR
@@ -1077,7 +1079,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
10771079
.clampNumElements(0, v8s8, v16s8)
10781080
.clampNumElements(0, v4s16, v8s16)
10791081
.clampNumElements(0, v4s32, v4s32)
1080-
.clampNumElements(0, v2s64, v2s64);
1082+
.clampNumElements(0, v2s64, v2s64)
1083+
.bitcastIf(
1084+
// Bitcast pointers vector to i64.
1085+
[=](const LegalityQuery &Query) {
1086+
return Query.Types[0].isPointerVector();
1087+
},
1088+
[=](const LegalityQuery &Query) {
1089+
const LLT DstTy = Query.Types[0];
1090+
return std::pair(0, LLT::vector(DstTy.getElementCount(), 64));
1091+
});
10811092

10821093
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
10831094
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}})

llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,11 @@ body: |
5959
; CHECK-NEXT: {{ $}}
6060
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x p0>) = COPY $q0
6161
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x p0>) = COPY $q1
62-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x p0>) = G_SHUFFLE_VECTOR [[COPY]](<2 x p0>), [[COPY1]], shufflemask(0, 0)
63-
; CHECK-NEXT: $q0 = COPY [[SHUF]](<2 x p0>)
62+
; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY]](<2 x p0>)
63+
; CHECK-NEXT: [[PTRTOINT1:%[0-9]+]]:_(<2 x s64>) = G_PTRTOINT [[COPY1]](<2 x p0>)
64+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[PTRTOINT]](<2 x s64>), [[PTRTOINT1]], shufflemask(0, 0)
65+
; CHECK-NEXT: [[INTTOPTR:%[0-9]+]]:_(<2 x p0>) = G_INTTOPTR [[SHUF]](<2 x s64>)
66+
; CHECK-NEXT: $q0 = COPY [[INTTOPTR]](<2 x p0>)
6467
; CHECK-NEXT: RET_ReallyLR implicit $q0
6568
%0:_(<2 x p0>) = COPY $q0
6669
%1:_(<2 x p0>) = COPY $q1

llvm/test/CodeGen/AArch64/arm64-ext.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
22
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=0 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for test_v2p0
3+
; RUN: llc < %s -mtriple=arm64-eabi -global-isel=1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
64

75
define <8 x i8> @test_vextd(<8 x i8> %tmp1, <8 x i8> %tmp2) {
86
; CHECK-LABEL: test_vextd:

llvm/test/CodeGen/AArch64/neon-perm.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for test_vuzp1q_p0
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vuzp2q_p0
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip1q_p0
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vzip2q_p0
9-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn1q_p0
10-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vtrn2q_p0
3+
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
114

125
%struct.int8x8x2_t = type { [2 x <8 x i8>] }
136
%struct.int16x4x2_t = type { [2 x <4 x i16>] }

llvm/test/CodeGen/AArch64/shufflevector.ll

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22
; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3-
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4-
5-
; CHECK-GI: warning: Instruction selection used fallback path for shufflevector_v2p0
6-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v2p0_zeroes
7-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0
8-
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shufflevector_v4p0_zeroes
3+
; RUN: llc -mtriple=aarch64-none-linux-gnu -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
94

105
; ===== Legal Vector Types =====
116

@@ -393,12 +388,18 @@ define <4 x i64> @shufflevector_v4i64(<4 x i64> %a, <4 x i64> %b) {
393388
}
394389

395390
define <4 x ptr> @shufflevector_v4p0(<4 x ptr> %a, <4 x ptr> %b) {
396-
; CHECK-LABEL: shufflevector_v4p0:
397-
; CHECK: // %bb.0:
398-
; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d
399-
; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d
400-
; CHECK-NEXT: mov v1.16b, v2.16b
401-
; CHECK-NEXT: ret
391+
; CHECK-SD-LABEL: shufflevector_v4p0:
392+
; CHECK-SD: // %bb.0:
393+
; CHECK-SD-NEXT: zip2 v2.2d, v2.2d, v3.2d
394+
; CHECK-SD-NEXT: zip2 v0.2d, v0.2d, v1.2d
395+
; CHECK-SD-NEXT: mov v1.16b, v2.16b
396+
; CHECK-SD-NEXT: ret
397+
;
398+
; CHECK-GI-LABEL: shufflevector_v4p0:
399+
; CHECK-GI: // %bb.0:
400+
; CHECK-GI-NEXT: zip2 v0.2d, v0.2d, v1.2d
401+
; CHECK-GI-NEXT: zip2 v1.2d, v2.2d, v3.2d
402+
; CHECK-GI-NEXT: ret
402403
%c = shufflevector <4 x ptr> %a, <4 x ptr> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
403404
ret <4 x ptr> %c
404405
}

0 commit comments

Comments
 (0)