Skip to content

Commit 987b59a

Browse files
committed
[X86] SimplifyDemandedVectorEltsForTargetNode - use EVT for F16C nodes
As we allow these nodes to be created pre-legalization, we can't rely on them having a simple VT Fixes llvm#95278
1 parent 25af23f commit 987b59a

File tree

2 files changed

+23
-1
lines changed

2 files changed

+23
-1
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41934,7 +41934,7 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
4193441934
case X86ISD::CVTPH2PS:
4193541935
case X86ISD::CVTPS2PH: {
4193641936
SDValue Src = Op.getOperand(0);
41937-
MVT SrcVT = Src.getSimpleValueType();
41937+
EVT SrcVT = Src.getValueType();
4193841938
APInt SrcUndef, SrcZero;
4193941939
APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
4194041940
if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,

llvm/test/CodeGen/X86/pr95278.ll

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=x86_64-- -mcpu=skylake-avx512 | FileCheck %s
3+
4+
define void @PR95278(ptr %p0, ptr %p1) {
5+
; CHECK-LABEL: PR95278:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: vcvtph2ps 2016(%rdi), %zmm0
8+
; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
9+
; CHECK-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
10+
; CHECK-NEXT: vcvtps2ph $4, %xmm0, %xmm0
11+
; CHECK-NEXT: vmovd %xmm0, %eax
12+
; CHECK-NEXT: movw %ax, (%rsi)
13+
; CHECK-NEXT: vzeroupper
14+
; CHECK-NEXT: retq
15+
%load = load <1024 x half>, ptr %p0, align 2
16+
%ext = fpext <1024 x half> %load to <1024 x float>
17+
%shuffle = shufflevector <1024 x float> %ext, <1024 x float> poison, <1 x i32> <i32 1022>
18+
%elt = extractelement <1 x float> %shuffle, i64 0
19+
%trunc = fptrunc float %elt to half
20+
store half %trunc, ptr %p1, align 2
21+
ret void
22+
}

0 commit comments

Comments
 (0)