Skip to content

Commit 627463d

Browse files
authored
[X86][FP16] Limit combination of fp_round & concat to concat of 2 operands (#94302)
Add check of number of operands for concat_vectors being equal to 2. This can avoid crash if there are more than 2 operands for concat_vectors and operand 0 & 1 are undef value.
1 parent 99b2581 commit 627463d

File tree

2 files changed

+28
-3
lines changed

2 files changed

+28
-3
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57181,9 +57181,12 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
5718157181
SDValue Cvt, Chain;
5718257182
unsigned NumElts = VT.getVectorNumElements();
5718357183
if (Subtarget.hasFP16()) {
57184-
// Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64), ..)))
57185-
// into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64), ..))
57186-
if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS) {
57184+
// Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64),
57185+
// v4f32 (xint_to_fp v4i64))))
57186+
// into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64),
57187+
// v8f16 (CVTXI2P v4i64)))
57188+
if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS &&
57189+
Src.getNumOperands() == 2) {
5718757190
SDValue Cvt0, Cvt1;
5718857191
SDValue Op0 = Src.getOperand(0);
5718957192
SDValue Op1 = Src.getOperand(1);
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512fp16 | FileCheck %s
4+
5+
define void @foo(<2 x float> %0) {
6+
; CHECK-LABEL: foo:
7+
; CHECK: # %bb.0: # %entry
8+
; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
9+
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
10+
; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
11+
; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0
12+
; CHECK-NEXT: vmovlps %xmm0, 0
13+
; CHECK-NEXT: vzeroupper
14+
; CHECK-NEXT: retq
15+
entry:
16+
%1 = shufflevector <2 x float> zeroinitializer, <2 x float> %0, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 1, i32 2, i32 3>
17+
%2 = fptrunc <8 x float> %1 to <8 x half>
18+
%3 = bitcast <8 x half> %2 to <2 x i64>
19+
%4 = extractelement <2 x i64> %3, i64 0
20+
store i64 %4, ptr null, align 8
21+
ret void
22+
}

0 commit comments

Comments
 (0)