Skip to content

Commit 2f3dd69

Browse files
committed
Review Comments
Change-Id: Ic832bfe2e1b802c4dbbda8b0774d78bcb3aa0180
1 parent 0929ba9 commit 2f3dd69

File tree

3 files changed

+7
-100
lines changed

3 files changed

+7
-100
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,10 +1135,13 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11351135
if (IsExtractSubvector)
11361136
Kind = TTI::SK_PermuteSingleSrc;
11371137

1138+
if (!isa<FixedVectorType>(VT))
1139+
return BaseT::getShuffleCost(Kind, VT, Mask, CostKind, Index, SubTp);
1140+
11381141
unsigned NumVectorElts = cast<FixedVectorType>(VT)->getNumElements();
11391142

11401143
if (ST->hasVOP3PInsts()) {
1141-
if (!(cast<FixedVectorType>(VT)->getNumElements() % 2) &&
1144+
if (!(NumVectorElts % 2) &&
11421145
DL.getTypeSizeInBits(VT->getElementType()) == 16) {
11431146
// With op_sel VOP3P instructions freely can access the low half or high
11441147
// half of a register, so any swizzle is free.
@@ -1153,7 +1156,7 @@ InstructionCost GCNTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
11531156
// typically cheaper than scalarized versions.
11541157
unsigned RequestedElts =
11551158
count_if(Mask, [](int MaskElt) { return MaskElt != -1; });
1156-
return RequestedElts / 2 + RequestedElts % 2;
1159+
return alignTo(RequestedElts, 2) / 2;
11571160
}
11581161
default:
11591162
break;

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll

Lines changed: 1 addition & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
33
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4-
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GFX9 %s
4+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
55

66
define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
77
; GFX7-LABEL: @uadd_sat_v2i16(
@@ -156,18 +156,6 @@ define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
156156
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
157157
; GCN-NEXT: ret <2 x i32> [[INS_1]]
158158
;
159-
; GFX9-LABEL: @uadd_sat_v2i32(
160-
; GFX9-NEXT: bb:
161-
; GFX9-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
162-
; GFX9-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
163-
; GFX9-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
164-
; GFX9-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
165-
; GFX9-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
166-
; GFX9-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
167-
; GFX9-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
168-
; GFX9-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
169-
; GFX9-NEXT: ret <2 x i32> [[INS_1]]
170-
;
171159
bb:
172160
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
173161
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
@@ -193,18 +181,6 @@ define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
193181
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
194182
; GCN-NEXT: ret <2 x i32> [[INS_1]]
195183
;
196-
; GFX9-LABEL: @usub_sat_v2i32(
197-
; GFX9-NEXT: bb:
198-
; GFX9-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
199-
; GFX9-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
200-
; GFX9-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
201-
; GFX9-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
202-
; GFX9-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
203-
; GFX9-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
204-
; GFX9-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
205-
; GFX9-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
206-
; GFX9-NEXT: ret <2 x i32> [[INS_1]]
207-
;
208184
bb:
209185
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
210186
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
@@ -230,18 +206,6 @@ define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
230206
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
231207
; GCN-NEXT: ret <2 x i32> [[INS_1]]
232208
;
233-
; GFX9-LABEL: @sadd_sat_v2i32(
234-
; GFX9-NEXT: bb:
235-
; GFX9-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
236-
; GFX9-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
237-
; GFX9-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
238-
; GFX9-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
239-
; GFX9-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
240-
; GFX9-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
241-
; GFX9-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
242-
; GFX9-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
243-
; GFX9-NEXT: ret <2 x i32> [[INS_1]]
244-
;
245209
bb:
246210
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
247211
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
@@ -267,18 +231,6 @@ define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
267231
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
268232
; GCN-NEXT: ret <2 x i32> [[INS_1]]
269233
;
270-
; GFX9-LABEL: @ssub_sat_v2i32(
271-
; GFX9-NEXT: bb:
272-
; GFX9-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
273-
; GFX9-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
274-
; GFX9-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
275-
; GFX9-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
276-
; GFX9-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
277-
; GFX9-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
278-
; GFX9-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
279-
; GFX9-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
280-
; GFX9-NEXT: ret <2 x i32> [[INS_1]]
281-
;
282234
bb:
283235
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
284236
%arg0.1 = extractelement <2 x i32> %arg0, i64 1

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll

Lines changed: 1 addition & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
22
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s
33
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s
4-
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GFX9 %s
4+
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,instcombine %s | FileCheck -check-prefixes=GCN,GFX9 %s
55

66
define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) {
77
; GFX7-LABEL: @uadd_sat_v2i16(
@@ -156,18 +156,6 @@ define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
156156
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
157157
; GCN-NEXT: ret <2 x i32> [[INS_1]]
158158
;
159-
; GFX9-LABEL: @uadd_sat_v2i32(
160-
; GFX9-NEXT: bb:
161-
; GFX9-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
162-
; GFX9-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
163-
; GFX9-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
164-
; GFX9-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
165-
; GFX9-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
166-
; GFX9-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
167-
; GFX9-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
168-
; GFX9-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
169-
; GFX9-NEXT: ret <2 x i32> [[INS_1]]
170-
;
171159
bb:
172160
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
173161
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
@@ -193,18 +181,6 @@ define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
193181
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
194182
; GCN-NEXT: ret <2 x i32> [[INS_1]]
195183
;
196-
; GFX9-LABEL: @usub_sat_v2i32(
197-
; GFX9-NEXT: bb:
198-
; GFX9-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
199-
; GFX9-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
200-
; GFX9-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
201-
; GFX9-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
202-
; GFX9-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
203-
; GFX9-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
204-
; GFX9-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
205-
; GFX9-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
206-
; GFX9-NEXT: ret <2 x i32> [[INS_1]]
207-
;
208184
bb:
209185
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
210186
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
@@ -230,18 +206,6 @@ define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
230206
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
231207
; GCN-NEXT: ret <2 x i32> [[INS_1]]
232208
;
233-
; GFX9-LABEL: @sadd_sat_v2i32(
234-
; GFX9-NEXT: bb:
235-
; GFX9-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
236-
; GFX9-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
237-
; GFX9-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
238-
; GFX9-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
239-
; GFX9-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
240-
; GFX9-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
241-
; GFX9-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
242-
; GFX9-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
243-
; GFX9-NEXT: ret <2 x i32> [[INS_1]]
244-
;
245209
bb:
246210
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
247211
%arg0.1 = extractelement <2 x i32> %arg0, i64 1
@@ -267,18 +231,6 @@ define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) {
267231
; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
268232
; GCN-NEXT: ret <2 x i32> [[INS_1]]
269233
;
270-
; GFX9-LABEL: @ssub_sat_v2i32(
271-
; GFX9-NEXT: bb:
272-
; GFX9-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0
273-
; GFX9-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1
274-
; GFX9-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0
275-
; GFX9-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1
276-
; GFX9-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]])
277-
; GFX9-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]])
278-
; GFX9-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> poison, i32 [[ADD_0]], i64 0
279-
; GFX9-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1
280-
; GFX9-NEXT: ret <2 x i32> [[INS_1]]
281-
;
282234
bb:
283235
%arg0.0 = extractelement <2 x i32> %arg0, i64 0
284236
%arg0.1 = extractelement <2 x i32> %arg0, i64 1

0 commit comments

Comments
 (0)