Skip to content

Commit 46a7f4d

Browse files
committed
[SVE][CodeGen] Fix bug in DAGCombiner::reduceBuildVecToShuffle
When trying to reduce a BUILD_VECTOR to a SHUFFLE_VECTOR it's important that we carefully check the vector types that led to that BUILD_VECTOR. In the test I have attached to this commit there is a case where the results of two SVE faddv instructions are being stored to consecutive memory locations. With my fix, as part of merging those stores we discover that each BUILD_VECTOR element came from an extract of a SVE vector element and therefore bail out. Differential Revision: https://reviews.llvm.org/D82564
1 parent 6b9a706 commit 46a7f4d

File tree

2 files changed

+35
-0
lines changed

2 files changed

+35
-0
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18287,6 +18287,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
1828718287
return SDValue();
1828818288
SDValue ExtractedFromVec = Op.getOperand(0);
1828918289

18290+
if (ExtractedFromVec.getValueType().isScalableVector())
18291+
return SDValue();
18292+
1829018293
const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
1829118294
if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
1829218295
return SDValue();
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
2+
3+
%complex = type { { double, double } }
4+
5+
; Function Attrs: argmemonly nounwind readonly
6+
declare <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double>, i32 immarg) #3
7+
8+
; Function Attrs: argmemonly nounwind readonly
9+
declare <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1(<vscale x 2 x i1>, double*) #3
10+
11+
; Function Attrs: nounwind readnone
12+
declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2
13+
14+
define void @foo1(%complex* %outval, <vscale x 2 x i1> %pred, double *%inptr) {
15+
; CHECK-LABEL: foo1:
16+
; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1]
17+
; CHECK-NEXT: faddv d2, p0, z0.d
18+
; CHECK-NEXT: faddv d0, p0, z1.d
19+
; CHECK-NEXT: mov v2.d[1], v0.d[0]
20+
; CHECK-NEXT: str q2, [x0]
21+
%realp = getelementptr inbounds %complex, %complex* %outval, i64 0, i32 0, i32 0
22+
%imagp = getelementptr inbounds %complex, %complex* %outval, i64 0, i32 0, i32 1
23+
%1 = call <vscale x 4 x double> @llvm.aarch64.sve.ld2.nxv4f64.nxv2i1(<vscale x 2 x i1> %pred, double* nonnull %inptr)
24+
%2 = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %1, i32 0)
25+
%3 = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pred, <vscale x 2 x double> %2)
26+
%4 = call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %1, i32 1)
27+
%5 = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pred, <vscale x 2 x double> %4)
28+
store double %3, double* %realp, align 8
29+
store double %5, double* %imagp, align 8
30+
ret void
31+
}
32+

0 commit comments

Comments
 (0)