Skip to content

Commit da9559d

Browse files
authored
Do not use PerformEXTRACTCombine for v8i8 types (#81242)
Same as with v4i8 types, we should not be using PerformEXTRACTCombine for v8i8 types.
1 parent 32e65b0 commit da9559d

File tree

2 files changed

+51
-48
lines changed

2 files changed

+51
-48
lines changed

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5354,10 +5354,11 @@ static SDValue PerformEXTRACTCombine(SDNode *N,
53545354
if (Vector->getOpcode() == ISD::LOAD && VectorVT.isSimple() &&
53555355
IsPTXVectorType(VectorVT.getSimpleVT()))
53565356
return SDValue(); // Native vector loads already combine nicely w/
5357-
// extract_vector_elt, except for v4i8.
5358-
// Don't mess with singletons or v2*16 types, we already handle them OK.
5357+
// extract_vector_elt.
5358+
// Don't mess with singletons or v2*16, v4i8 and v8i8 types, we already
5359+
// handle them OK.
53595360
if (VectorVT.getVectorNumElements() == 1 || Isv2x16VT(VectorVT) ||
5360-
VectorVT == MVT::v4i8)
5361+
VectorVT == MVT::v4i8 || VectorVT == MVT::v8i8)
53615362
return SDValue();
53625363

53635364
uint64_t VectorBits = VectorVT.getSizeInBits();

llvm/test/CodeGen/NVPTX/extractelement.ll

Lines changed: 47 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33

44

55
; CHECK-LABEL: test_v2i8
6-
; CHECK-DAG: ld.param.u16 [[A:%rs[0-9+]]], [test_v2i8_param_0];
7-
; CHECK-DAG: cvt.s16.s8 [[E0:%rs[0-9+]]], [[A]];
8-
; CHECK-DAG: shr.s16 [[E1:%rs[0-9+]]], [[A]], 8;
6+
; CHECK-DAG: ld.param.u16 [[A:%rs[0-9]+]], [test_v2i8_param_0];
7+
; CHECK-DAG: cvt.s16.s8 [[E0:%rs[0-9]+]], [[A]];
8+
; CHECK-DAG: shr.s16 [[E1:%rs[0-9]+]], [[A]], 8;
99
define i16 @test_v2i8(i16 %a) {
1010
%v = bitcast i16 %a to <2 x i8>
1111
%r0 = extractelement <2 x i8> %v, i64 0
@@ -17,15 +17,15 @@ define i16 @test_v2i8(i16 %a) {
1717
}
1818

1919
; CHECK-LABEL: test_v4i8
20-
; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_param_0];
21-
; CHECK-DAG: bfe.s32 [[R0:%r[0-9+]]], [[R]], 0, 8;
22-
; CHECK-DAG: cvt.s8.s32 [[E0:%rs[0-9+]]], [[R0]];
23-
; CHECK-DAG: bfe.s32 [[R1:%r[0-9+]]], [[R]], 8, 8;
24-
; CHECK-DAG: cvt.s8.s32 [[E1:%rs[0-9+]]], [[R1]];
25-
; CHECK-DAG: bfe.s32 [[R2:%r[0-9+]]], [[R]], 16, 8;
26-
; CHECK-DAG: cvt.s8.s32 [[E2:%rs[0-9+]]], [[R2]];
27-
; CHECK-DAG: bfe.s32 [[R3:%r[0-9+]]], [[R]], 24, 8;
28-
; CHECK-DAG: cvt.s8.s32 [[E3:%rs[0-9+]]], [[R3]];
20+
; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_param_0];
21+
; CHECK-DAG: bfe.s32 [[R0:%r[0-9]+]], [[R]], 0, 8;
22+
; CHECK-DAG: cvt.s8.s32 [[E0:%rs[0-9]+]], [[R0]];
23+
; CHECK-DAG: bfe.s32 [[R1:%r[0-9]+]], [[R]], 8, 8;
24+
; CHECK-DAG: cvt.s8.s32 [[E1:%rs[0-9]+]], [[R1]];
25+
; CHECK-DAG: bfe.s32 [[R2:%r[0-9]+]], [[R]], 16, 8;
26+
; CHECK-DAG: cvt.s8.s32 [[E2:%rs[0-9]+]], [[R2]];
27+
; CHECK-DAG: bfe.s32 [[R3:%r[0-9]+]], [[R]], 24, 8;
28+
; CHECK-DAG: cvt.s8.s32 [[E3:%rs[0-9]+]], [[R3]];
2929
define i16 @test_v4i8(i32 %a) {
3030
%v = bitcast i32 %a to <4 x i8>
3131
%r0 = extractelement <4 x i8> %v, i64 0
@@ -43,14 +43,14 @@ define i16 @test_v4i8(i32 %a) {
4343
}
4444

4545
; CHECK-LABEL: test_v4i8_s32
46-
; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_s32_param_0];
47-
; CHECK-DAG: bfe.s32 [[R0:%r[0-9+]]], [[R]], 0, 8;
48-
; CHECK-DAG: bfe.s32 [[R1:%r[0-9+]]], [[R]], 8, 8;
49-
; CHECK-DAG: bfe.s32 [[R2:%r[0-9+]]], [[R]], 16, 8;
50-
; CHECK-DAG: bfe.s32 [[R3:%r[0-9+]]], [[R]], 24, 8;
51-
; CHECK-DAG: add.s32 [[R01:%r[0-9+]]], [[R0]], [[R1]]
52-
; CHECK-DAG: add.s32 [[R23:%r[0-9+]]], [[R2]], [[R3]]
53-
; CHECK-DAG: add.s32 [[R0123:%r[0-9+]]], [[R01]], [[R23]]
46+
; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_s32_param_0];
47+
; CHECK-DAG: bfe.s32 [[R0:%r[0-9]+]], [[R]], 0, 8;
48+
; CHECK-DAG: bfe.s32 [[R1:%r[0-9]+]], [[R]], 8, 8;
49+
; CHECK-DAG: bfe.s32 [[R2:%r[0-9]+]], [[R]], 16, 8;
50+
; CHECK-DAG: bfe.s32 [[R3:%r[0-9]+]], [[R]], 24, 8;
51+
; CHECK-DAG: add.s32 [[R01:%r[0-9]+]], [[R0]], [[R1]]
52+
; CHECK-DAG: add.s32 [[R23:%r[0-9]+]], [[R2]], [[R3]]
53+
; CHECK-DAG: add.s32 [[R0123:%r[0-9]+]], [[R01]], [[R23]]
5454
define i32 @test_v4i8_s32(i32 %a) {
5555
%v = bitcast i32 %a to <4 x i8>
5656
%r0 = extractelement <4 x i8> %v, i64 0
@@ -68,14 +68,14 @@ define i32 @test_v4i8_s32(i32 %a) {
6868
}
6969

7070
; CHECK-LABEL: test_v4i8_u32
71-
; CHECK: ld.param.u32 [[R:%r[0-9+]]], [test_v4i8_u32_param_0];
72-
; CHECK-DAG: bfe.u32 [[R0:%r[0-9+]]], [[R]], 0, 8;
73-
; CHECK-DAG: bfe.u32 [[R1:%r[0-9+]]], [[R]], 8, 8;
74-
; CHECK-DAG: bfe.u32 [[R2:%r[0-9+]]], [[R]], 16, 8;
75-
; CHECK-DAG: bfe.u32 [[R3:%r[0-9+]]], [[R]], 24, 8;
76-
; CHECK-DAG: add.s32 [[R01:%r[0-9+]]], [[R0]], [[R1]]
77-
; CHECK-DAG: add.s32 [[R23:%r[0-9+]]], [[R2]], [[R3]]
78-
; CHECK-DAG: add.s32 [[R0123:%r[0-9+]]], [[R01]], [[R23]]
71+
; CHECK: ld.param.u32 [[R:%r[0-9]+]], [test_v4i8_u32_param_0];
72+
; CHECK-DAG: bfe.u32 [[R0:%r[0-9]+]], [[R]], 0, 8;
73+
; CHECK-DAG: bfe.u32 [[R1:%r[0-9]+]], [[R]], 8, 8;
74+
; CHECK-DAG: bfe.u32 [[R2:%r[0-9]+]], [[R]], 16, 8;
75+
; CHECK-DAG: bfe.u32 [[R3:%r[0-9]+]], [[R]], 24, 8;
76+
; CHECK-DAG: add.s32 [[R01:%r[0-9]+]], [[R0]], [[R1]]
77+
; CHECK-DAG: add.s32 [[R23:%r[0-9]+]], [[R2]], [[R3]]
78+
; CHECK-DAG: add.s32 [[R0123:%r[0-9]+]], [[R01]], [[R23]]
7979
define i32 @test_v4i8_u32(i32 %a) {
8080
%v = bitcast i32 %a to <4 x i8>
8181
%r0 = extractelement <4 x i8> %v, i64 0
@@ -95,23 +95,25 @@ define i32 @test_v4i8_u32(i32 %a) {
9595

9696

9797
; CHECK-LABEL: test_v8i8
98-
; CHECK: ld.param.u64 [[R:%rd[0-9+]]], [test_v8i8_param_0];
99-
; CHECK-DAG: cvt.s8.s64 [[E0:%rs[0-9+]]], [[R]];
100-
; Element 1 is still extracted by trunc, shr 8, not sure why.
101-
; CHECK-DAG: cvt.u16.u64 [[R01:%rs[0-9+]]], [[R]];
102-
; CHECK-DAG: shr.s16 [[E1:%rs[0-9+]]], [[R01]], 8;
103-
; CHECK-DAG: bfe.s64 [[RD2:%rd[0-9+]]], [[R]], 16, 8;
104-
; CHECK-DAG: cvt.s8.s64 [[E2:%rs[0-9+]]], [[RD2]];
105-
; CHECK-DAG: bfe.s64 [[RD3:%rd[0-9+]]], [[R]], 24, 8;
106-
; CHECK-DAG: cvt.s8.s64 [[E3:%rs[0-9+]]], [[RD3]];
107-
; CHECK-DAG: bfe.s64 [[RD4:%rd[0-9+]]], [[R]], 32, 8;
108-
; CHECK-DAG: cvt.s8.s64 [[E4:%rs[0-9+]]], [[RD4]];
109-
; CHECK-DAG: bfe.s64 [[RD5:%rd[0-9+]]], [[R]], 40, 8;
110-
; CHECK-DAG: cvt.s8.s64 [[E5:%rs[0-9+]]], [[RD5]];
111-
; CHECK-DAG: bfe.s64 [[RD6:%rd[0-9+]]], [[R]], 48, 8;
112-
; CHECK-DAG: cvt.s8.s64 [[E6:%rs[0-9+]]], [[RD6]];
113-
; CHECK-DAG: bfe.s64 [[RD7:%rd[0-9+]]], [[R]], 56, 8;
114-
; CHECK-DAG: cvt.s8.s64 [[E7:%rs[0-9+]]], [[RD7]];
98+
; CHECK: ld.param.u64 [[R:%rd[0-9]+]], [test_v8i8_param_0];
99+
; CHECK-DAG: cvt.u32.u64 [[R00:%r[0-9]+]], [[R]];
100+
; CHECK-DAG: { .reg .b32 tmp; mov.b64 {tmp, [[R01:%r[0-9]+]]}, [[R]]; }
101+
; CHECK-DAG: bfe.s32 [[R1:%r[0-9]+]], [[R00]], 0, 8;
102+
; CHECK-DAG: cvt.s8.s32 [[E1:%rs[0-9]+]], [[R1]];
103+
; CHECK-DAG: bfe.s32 [[R2:%r[0-9]+]], [[R00]], 8, 8;
104+
; CHECK-DAG: cvt.s8.s32 [[E2:%rs[0-9]+]], [[R2]];
105+
; CHECK-DAG: bfe.s32 [[R3:%r[0-9]+]], [[R00]], 16, 8;
106+
; CHECK-DAG: cvt.s8.s32 [[E3:%rs[0-9]+]], [[R3]];
107+
; CHECK-DAG: bfe.s32 [[R4:%r[0-9]+]], [[R00]], 24, 8;
108+
; CHECK-DAG: cvt.s8.s32 [[E4:%rs[0-9]+]], [[R4]];
109+
; CHECK-DAG: bfe.s32 [[R5:%r[0-9]+]], [[R01]], 0, 8;
110+
; CHECK-DAG: cvt.s8.s32 [[E5:%rs[0-9]+]], [[R5]];
111+
; CHECK-DAG: bfe.s32 [[R6:%r[0-9]+]], [[R01]], 8, 8;
112+
; CHECK-DAG: cvt.s8.s32 [[E6:%rs[0-9]+]], [[R6]];
113+
; CHECK-DAG: bfe.s32 [[R7:%r[0-9]+]], [[R01]], 16, 8;
114+
; CHECK-DAG: cvt.s8.s32 [[E7:%rs[0-9]+]], [[R7]];
115+
; CHECK-DAG: bfe.s32 [[R8:%r[0-9]+]], [[R01]], 24, 8;
116+
; CHECK-DAG: cvt.s8.s32 [[E8:%rs[0-9]+]], [[R8]];
115117

116118
define i16 @test_v8i8(i64 %a) {
117119
%v = bitcast i64 %a to <8 x i8>

0 commit comments

Comments
 (0)