Skip to content

Commit a54c6fc

Browse files
committed
[X86] Exclude invalid element types for bitcast/broadcast folding.
It looks like the fold introduced in 63f3383 can cause crashes if the type of the bitcasted value is not a valid vector element type, like x86_mmx. To resolve the crash, reject invalid vector element types. The way it is done in the patch is a bit clunky. Perhaps there's a better way to check? Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D104792
1 parent 121ecb0 commit a54c6fc

File tree

3 files changed

+64
-7
lines changed

3 files changed

+64
-7
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37487,7 +37487,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
3748737487
// 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
3748837488
if (Src.getOpcode() == ISD::BITCAST &&
3748937489
SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits() &&
37490-
DAG.getTargetLoweringInfo().isTypeLegal(BCVT)) {
37490+
DAG.getTargetLoweringInfo().isTypeLegal(BCVT) &&
37491+
FixedVectorType::isValidElementType(
37492+
BCVT.getScalarType().getTypeForEVT(*DAG.getContext()))) {
3749137493
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(),
3749237494
VT.getVectorNumElements());
3749337495
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));

llvm/test/CodeGen/X86/avx-vbroadcast.ll

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32
3-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64
2+
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr='+avx,+mmx' | FileCheck %s --check-prefix=X32
3+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr='+avx,+mmx' | FileCheck %s --check-prefix=X64
44

55
define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
66
; X32-LABEL: A:
@@ -972,6 +972,30 @@ define float @broadcast_lifetime() nounwind {
972972
ret float %9
973973
}
974974

975+
define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind {
976+
; X32-LABEL: broadcast_x86_mmx:
977+
; X32: ## %bb.0: ## %bb
978+
; X32-NEXT: subl $12, %esp
979+
; X32-NEXT: movq %mm0, (%esp)
980+
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
981+
; X32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
982+
; X32-NEXT: addl $12, %esp
983+
; X32-NEXT: retl
984+
;
985+
; X64-LABEL: broadcast_x86_mmx:
986+
; X64: ## %bb.0: ## %bb
987+
; X64-NEXT: movdq2q %xmm0, %mm0
988+
; X64-NEXT: movq2dq %mm0, %xmm0
989+
; X64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
990+
; X64-NEXT: retq
991+
bb:
992+
%tmp1 = bitcast x86_mmx %tmp to i64
993+
%tmp2 = insertelement <2 x i64> undef, i64 %tmp1, i32 0
994+
%tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
995+
%tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
996+
ret <8 x i16> %tmp4
997+
}
998+
975999
declare void @gfunc(<4 x float>*)
9761000
declare void @llvm.lifetime.start.p0i8(i64, i8*)
9771001
declare void @llvm.lifetime.end.p0i8(i64, i8*)

llvm/test/CodeGen/X86/avx2-vbroadcast.ll

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2
3-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
4-
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL
5-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
2+
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2,+mmx | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2
3+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2,+mmx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
4+
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq,+mmx | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL
5+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq,+mmx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
66

77
define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
88
; X32-LABEL: BB16:
@@ -1448,3 +1448,34 @@ eintry:
14481448
store <4 x i64> %splat.splat, <4 x i64>* %__b.addr.i, align 16
14491449
ret void
14501450
}
1451+
1452+
define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind {
1453+
; X32-LABEL: broadcast_x86_mmx:
1454+
; X32: ## %bb.0: ## %bb
1455+
; X32-NEXT: subl $12, %esp
1456+
; X32-NEXT: movq %mm0, (%esp)
1457+
; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
1458+
; X32-NEXT: addl $12, %esp
1459+
; X32-NEXT: retl
1460+
;
1461+
; X64-AVX2-LABEL: broadcast_x86_mmx:
1462+
; X64-AVX2: ## %bb.0: ## %bb
1463+
; X64-AVX2-NEXT: movdq2q %xmm0, %mm0
1464+
; X64-AVX2-NEXT: movq %mm0, %rax
1465+
; X64-AVX2-NEXT: vmovq %rax, %xmm0
1466+
; X64-AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1467+
; X64-AVX2-NEXT: retq
1468+
;
1469+
; X64-AVX512VL-LABEL: broadcast_x86_mmx:
1470+
; X64-AVX512VL: ## %bb.0: ## %bb
1471+
; X64-AVX512VL-NEXT: movdq2q %xmm0, %mm0
1472+
; X64-AVX512VL-NEXT: movq %mm0, %rax
1473+
; X64-AVX512VL-NEXT: vpbroadcastq %rax, %xmm0
1474+
; X64-AVX512VL-NEXT: retq
1475+
bb:
1476+
%tmp1 = bitcast x86_mmx %tmp to i64
1477+
%tmp2 = insertelement <2 x i64> undef, i64 %tmp1, i32 0
1478+
%tmp3 = bitcast <2 x i64> %tmp2 to <8 x i16>
1479+
%tmp4 = shufflevector <8 x i16> %tmp3, <8 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1480+
ret <8 x i16> %tmp4
1481+
}

0 commit comments

Comments
 (0)