Skip to content

Commit 70726ce

Browse files
committed
DAG: Combine extract_vector_elt of concat_vectors
Fixes extra canonicalize regressions when legalizing vector fminnum/fmaxnum.
1 parent d4638cb commit 70726ce

File tree

2 files changed

+25
-17
lines changed

2 files changed

+25
-17
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17604,6 +17604,27 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
1760417604
Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
1760517605
Index = DAG.getConstant(Elt, DL, Index.getValueType());
1760617606
}
17607+
} else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS &&
17608+
!BCNumEltsChanged && VecVT.getVectorElementType() == ScalarVT) {
17609+
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
17610+
// -> extract_vector_elt a, 0
17611+
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
17612+
// -> extract_vector_elt a, 1
17613+
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
17614+
// -> extract_vector_elt b, 0
17615+
// extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
17616+
// -> extract_vector_elt b, 1
17617+
SDLoc SL(N);
17618+
EVT ConcatVT = VecOp.getOperand(0).getValueType();
17619+
unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
17620+
SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
17621+
Index.getValueType());
17622+
17623+
SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
17624+
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
17625+
ConcatVT.getVectorElementType(),
17626+
ConcatOp, NewIdx);
17627+
return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
1760717628
}
1760817629

1760917630
// Make sure we found a non-volatile load and the extractelement is

llvm/test/CodeGen/AMDGPU/reduction.ll

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -438,10 +438,8 @@ entry:
438438
; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
439439
; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
440440
; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
441+
; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
441442

442-
; FIXME: Extra canonicalize leftover
443-
; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
444-
; GFX9-NEXT: v_max_f16_e32 v0, [[MAX]], [[TMP]]
445443

446444
; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
447445
; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
@@ -466,11 +464,7 @@ entry:
466464
; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
467465
; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
468466
; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
469-
470-
; FIXME: Extra canonicalize leftover
471-
; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
472-
; GFX9-NEXT: v_min_f16_e32 v0, [[MIN]], [[TMP]]
473-
467+
; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
474468

475469
; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
476470
; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
@@ -507,10 +501,7 @@ entry:
507501
; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
508502
; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
509503
; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
510-
511-
; FIXME: Extra canonicalize leftover
512-
; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
513-
; GFX9-NEXT: v_max_f16_e32 v0, [[MAX]], [[TMP]]
504+
; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
514505

515506
; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
516507
; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
@@ -549,11 +540,7 @@ entry:
549540
; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
550541
; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
551542
; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
552-
553-
; FIXME: Extra canonicalize leftover
554-
; GFX9-NEXT: v_max_f16_sdwa [[TMP:v[0-9]+]], [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
555-
; GFX9-NEXT: v_min_f16_e32 v0, [[MIN]], [[TMP]]
556-
543+
; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
557544

558545
; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
559546
; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1

0 commit comments

Comments
 (0)