Skip to content

Commit 5db0b25

Browse files
committed
[AMDGPU][GlobalISel] Handle 64byte EltSIze in getRegSplitParts
Reviewers: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D74518
1 parent a0d913a commit 5db0b25

File tree

2 files changed

+49
-14
lines changed

2 files changed

+49
-14
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1772,27 +1772,42 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
17721772
}
17731773
}
17741774

1775-
assert(EltSize == 32 && "unhandled elt size");
1775+
if (EltSize == 32) {
1776+
static const int16_t Sub0_31_256[] = {
1777+
AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1778+
AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1779+
AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1780+
AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1781+
};
17761782

1777-
static const int16_t Sub0_31_256[] = {
1778-
AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1779-
AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1780-
AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
1781-
AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
1782-
};
1783+
static const int16_t Sub0_15_256[] = {
1784+
AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1785+
AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1786+
};
1787+
1788+
switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1789+
case 256:
1790+
return {};
1791+
case 512:
1792+
return makeArrayRef(Sub0_15_256);
1793+
case 1024:
1794+
return makeArrayRef(Sub0_31_256);
1795+
default:
1796+
llvm_unreachable("unhandled register size");
1797+
}
1798+
}
17831799

1784-
static const int16_t Sub0_15_256[] = {
1785-
AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
1786-
AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
1800+
assert(EltSize == 64 && "unhandled elt size");
1801+
static const int16_t Sub0_31_512[] = {
1802+
AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
1803+
AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
17871804
};
17881805

17891806
switch (AMDGPU::getRegBitWidth(*RC->MC)) {
1790-
case 256:
1791-
return {};
17921807
case 512:
1793-
return makeArrayRef(Sub0_15_256);
1808+
return {};
17941809
case 1024:
1795-
return makeArrayRef(Sub0_31_256);
1810+
return makeArrayRef(Sub0_31_512);
17961811
default:
17971812
llvm_unreachable("unhandled register size");
17981813
}

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-concat-vectors.mir

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,26 @@ body: |
586586
$vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %8
587587
...
588588

589+
---
590+
name: test_concat_vectors_s_v32s32_s_v16s32_s_v16s32
591+
legalized: true
592+
regBankSelected: true
593+
594+
body: |
595+
bb.0:
596+
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
597+
598+
; GCN-LABEL: name: test_concat_vectors_s_v32s32_s_v16s32_s_v16s32
599+
; GCN: [[COPY:%[0-9]+]]:sreg_512 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
600+
; GCN: [[COPY1:%[0-9]+]]:sreg_512 = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
601+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_1024 = REG_SEQUENCE [[COPY]], %subreg.sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15, [[COPY1]], %subreg.sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
602+
; GCN: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY [[REG_SEQUENCE]]
603+
%0:sgpr(<16 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
604+
%1:sgpr(<16 x s32>) = COPY $sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31
605+
%2:sgpr(<32 x s32>) = G_CONCAT_VECTORS %0, %1
606+
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 = COPY %2
607+
...
608+
589609
---
590610
name: test_concat_vectors_s_v4s64_s_v2s64_s_v2s64
591611
legalized: true

0 commit comments

Comments
 (0)