Skip to content

Commit f82902f

Browse files
authored
[X86] Fold (f16 bitcast extract_vectorelt(v,0)) to (extract_vectorelt (v8f16 bitcast(v,0))) (#125877)
Also handles possible truncations from i32 to i16. Cleans up some of the poor codegen identified in #98630
1 parent 3115278 commit f82902f

14 files changed

+353
-551
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45160,6 +45160,19 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
4516045160
}
4516145161
}
4516245162

45163+
// Attempt to peek through f16 bitcasted extractions hidden by truncation.
45164+
if (VT == MVT::f16 && SrcVT == MVT::i16) {
45165+
SDValue Src = peekThroughTruncates(N0);
45166+
if (Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
45167+
Src.getOperand(0).getValueSizeInBits() == 128 &&
45168+
isNullConstant(Src.getOperand(1))) {
45169+
SDLoc DL(N);
45170+
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
45171+
DAG.getBitcast(MVT::v8f16, Src.getOperand(0)),
45172+
DAG.getVectorIdxConstant(0, DL));
45173+
}
45174+
}
45175+
4516345176
// Since MMX types are special and don't usually play with other vector types,
4516445177
// it's better to handle them early to be sure we emit efficient code by
4516545178
// avoiding store-load conversions.

llvm/test/CodeGen/X86/bfloat.ll

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -82,8 +82,6 @@ define bfloat @add2(bfloat %a, bfloat %b) nounwind {
8282
; X86-NEXT: vmovd %eax, %xmm1
8383
; X86-NEXT: vaddss %xmm0, %xmm1, %xmm0
8484
; X86-NEXT: vcvtneps2bf16 %xmm0, %xmm0
85-
; X86-NEXT: vmovw %xmm0, %eax
86-
; X86-NEXT: vmovw %eax, %xmm0
8785
; X86-NEXT: retl
8886
;
8987
; SSE2-LABEL: add2:
@@ -110,8 +108,6 @@ define bfloat @add2(bfloat %a, bfloat %b) nounwind {
110108
; FP16-NEXT: vmovd %eax, %xmm1
111109
; FP16-NEXT: vaddss %xmm0, %xmm1, %xmm0
112110
; FP16-NEXT: vcvtneps2bf16 %xmm0, %xmm0
113-
; FP16-NEXT: vmovw %xmm0, %eax
114-
; FP16-NEXT: vmovw %eax, %xmm0
115111
; FP16-NEXT: retq
116112
;
117113
; AVXNC-LABEL: add2:
@@ -124,8 +120,6 @@ define bfloat @add2(bfloat %a, bfloat %b) nounwind {
124120
; AVXNC-NEXT: vmovd %eax, %xmm1
125121
; AVXNC-NEXT: vaddss %xmm0, %xmm1, %xmm0
126122
; AVXNC-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
127-
; AVXNC-NEXT: vmovd %xmm0, %eax
128-
; AVXNC-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
129123
; AVXNC-NEXT: retq
130124
%add = fadd bfloat %a, %b
131125
ret bfloat %add
@@ -432,8 +426,6 @@ define bfloat @add_constant2(bfloat %a) nounwind {
432426
; X86-NEXT: vmovd %eax, %xmm0
433427
; X86-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
434428
; X86-NEXT: vcvtneps2bf16 %xmm0, %xmm0
435-
; X86-NEXT: vmovw %xmm0, %eax
436-
; X86-NEXT: vmovw %eax, %xmm0
437429
; X86-NEXT: retl
438430
;
439431
; SSE2-LABEL: add_constant2:
@@ -454,8 +446,6 @@ define bfloat @add_constant2(bfloat %a) nounwind {
454446
; FP16-NEXT: vmovd %eax, %xmm0
455447
; FP16-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
456448
; FP16-NEXT: vcvtneps2bf16 %xmm0, %xmm0
457-
; FP16-NEXT: vmovw %xmm0, %eax
458-
; FP16-NEXT: vmovw %eax, %xmm0
459449
; FP16-NEXT: retq
460450
;
461451
; AVXNC-LABEL: add_constant2:
@@ -465,8 +455,6 @@ define bfloat @add_constant2(bfloat %a) nounwind {
465455
; AVXNC-NEXT: vmovd %eax, %xmm0
466456
; AVXNC-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
467457
; AVXNC-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0
468-
; AVXNC-NEXT: vmovd %xmm0, %eax
469-
; AVXNC-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
470458
; AVXNC-NEXT: retq
471459
%add = fadd bfloat %a, 1.0
472460
ret bfloat %add

llvm/test/CodeGen/X86/canonicalize-vars-f16-type.ll

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -154,8 +154,6 @@ define half @complex_canonicalize_fmul_half(half %a, half %b) nounwind {
154154
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
155155
; AVX512-NEXT: vsubss %xmm1, %xmm0, %xmm0
156156
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
157-
; AVX512-NEXT: vmovd %xmm0, %eax
158-
; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
159157
; AVX512-NEXT: retq
160158
entry:
161159

@@ -239,15 +237,11 @@ define void @v_test_canonicalize_v2half(<2 x half> addrspace(1)* %out) nounwind
239237
; AVX512-NEXT: vxorps %xmm3, %xmm3, %xmm3
240238
; AVX512-NEXT: vblendps {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3]
241239
; AVX512-NEXT: vcvtps2ph $4, %xmm2, %xmm2
242-
; AVX512-NEXT: vmovd %xmm2, %eax
243-
; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm2
244240
; AVX512-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
245241
; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
246242
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
247243
; AVX512-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3]
248244
; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
249-
; AVX512-NEXT: vmovd %xmm0, %eax
250-
; AVX512-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
251245
; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3]
252246
; AVX512-NEXT: vmovd %xmm0, (%rdi)
253247
; AVX512-NEXT: retq

0 commit comments

Comments
 (0)