Skip to content

Commit 521f522

Browse files
author
Leon Clark
committed
Address comments.
1 parent feefba6 commit 521f522

File tree

3 files changed

+140
-57
lines changed

3 files changed

+140
-57
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -8745,28 +8745,37 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, const SDLoc &DL,
87458745
return LowerShift(Res, Subtarget, DAG);
87468746
}
87478747

8748+
static bool isShuffleFoldableLoad(SDValue);
8749+
87488750
/// Attempt to lower a BUILD_VECTOR of scalar values to a shuffle of splats
87498751
/// representing a blend.
87508752
static SDValue lowerBuildVectorAsBlend(BuildVectorSDNode *BVOp, SDLoc const &DL,
87518753
X86Subtarget const &Subtarget,
87528754
SelectionDAG &DAG) {
87538755
MVT VT = BVOp->getSimpleValueType(0u);
8754-
auto const NumElems = VT.getVectorNumElements();
87558756

8756-
if (VT == MVT::v4f64) {
8757-
// Collect unique operands.
8758-
auto UniqueOps = SmallSet<SDValue, 16u>();
8759-
for (SDValue Op : BVOp->ops()) {
8760-
if (isIntOrFPConstant(Op) || Op.isUndef())
8761-
return SDValue();
8762-
UniqueOps.insert(Op);
8763-
}
8764-
// Candidate BUILD_VECTOR must have 2 unique operands.
8765-
if (UniqueOps.size() != 2u)
8757+
if (VT != MVT::v4f64)
8758+
return SDValue();
8759+
8760+
// Collect unique operands.
8761+
auto UniqueOps = SmallSet<SDValue, 16u>();
8762+
for (SDValue Op : BVOp->ops()) {
8763+
if (isIntOrFPConstant(Op) || Op.isUndef())
87668764
return SDValue();
8765+
UniqueOps.insert(Op);
8766+
}
8767+
8768+
// Candidate BUILD_VECTOR must have 2 unique operands.
8769+
if (UniqueOps.size() != 2u)
8770+
return SDValue();
8771+
8772+
SDValue Op0 = *(UniqueOps.begin());
8773+
SDValue Op1 = *(++UniqueOps.begin());
8774+
8775+
if (isShuffleFoldableLoad(Op0) || isShuffleFoldableLoad(Op1) ||
8776+
Subtarget.hasAVX2()) {
87678777
// Create shuffle mask.
8768-
SDValue Op0 = *(UniqueOps.begin());
8769-
SDValue Op1 = *(++UniqueOps.begin());
8778+
auto const NumElems = VT.getVectorNumElements();
87708779
SmallVector<int, 16u> Mask(NumElems);
87718780
for (auto I = 0u; I < NumElems; ++I) {
87728781
SDValue Op = BVOp->getOperand(I);
@@ -8778,7 +8787,7 @@ static SDValue lowerBuildVectorAsBlend(BuildVectorSDNode *BVOp, SDLoc const &DL,
87788787
return DAG.getVectorShuffle(VT, DL, NewOp0, NewOp1, Mask);
87798788
}
87808789

8781-
return {};
8790+
return SDValue();
87828791
}
87838792

87848793
/// Create a vector constant without a load. SSE/AVX provide the bare minimum

llvm/test/CodeGen/X86/build-vector-256.ll

Lines changed: 51 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -415,22 +415,28 @@ define <32 x i8> @test_buildvector_v32i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
415415
; build vectors of repeated elements
416416

417417
define <4 x double> @test_buildvector_4f64_2_var(double %a0, double %a1) {
418-
; AVX-32-LABEL: test_buildvector_4f64_2_var:
419-
; AVX-32: # %bb.0:
420-
; AVX-32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
421-
; AVX-32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm1
422-
; AVX-32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
423-
; AVX-32-NEXT: retl
418+
; AVX1-32-LABEL: test_buildvector_4f64_2_var:
419+
; AVX1-32: # %bb.0:
420+
; AVX1-32-NEXT: vmovups {{[0-9]+}}(%esp), %xmm0
421+
; AVX1-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
422+
; AVX1-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
423+
; AVX1-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
424+
; AVX1-32-NEXT: retl
424425
;
425426
; AVX1-64-LABEL: test_buildvector_4f64_2_var:
426427
; AVX1-64: # %bb.0:
427-
; AVX1-64-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
428-
; AVX1-64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1
429-
; AVX1-64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
430-
; AVX1-64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
431-
; AVX1-64-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
428+
; AVX1-64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
429+
; AVX1-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
430+
; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
432431
; AVX1-64-NEXT: retq
433432
;
433+
; AVX2-32-LABEL: test_buildvector_4f64_2_var:
434+
; AVX2-32: # %bb.0:
435+
; AVX2-32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
436+
; AVX2-32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm1
437+
; AVX2-32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
438+
; AVX2-32-NEXT: retl
439+
;
434440
; AVX2-64-LABEL: test_buildvector_4f64_2_var:
435441
; AVX2-64: # %bb.0:
436442
; AVX2-64-NEXT: vbroadcastsd %xmm1, %ymm1
@@ -445,21 +451,41 @@ define <4 x double> @test_buildvector_4f64_2_var(double %a0, double %a1) {
445451
}
446452

447453
define <4 x double> @test_buildvector_4f64_2_load(ptr %p0, ptr %p1) {
448-
; AVX-32-LABEL: test_buildvector_4f64_2_load:
449-
; AVX-32: # %bb.0:
450-
; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %eax
451-
; AVX-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
452-
; AVX-32-NEXT: vbroadcastsd (%ecx), %ymm0
453-
; AVX-32-NEXT: vbroadcastsd (%eax), %ymm1
454-
; AVX-32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
455-
; AVX-32-NEXT: retl
454+
; AVX1-32-LABEL: test_buildvector_4f64_2_load:
455+
; AVX1-32: # %bb.0:
456+
; AVX1-32-NEXT: movl {{[0-9]+}}(%esp), %eax
457+
; AVX1-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
458+
; AVX1-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
459+
; AVX1-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
460+
; AVX1-32-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
461+
; AVX1-32-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
462+
; AVX1-32-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
463+
; AVX1-32-NEXT: retl
456464
;
457-
; AVX-64-LABEL: test_buildvector_4f64_2_load:
458-
; AVX-64: # %bb.0:
459-
; AVX-64-NEXT: vbroadcastsd (%rsi), %ymm0
460-
; AVX-64-NEXT: vbroadcastsd (%rdi), %ymm1
461-
; AVX-64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
462-
; AVX-64-NEXT: retq
465+
; AVX1-64-LABEL: test_buildvector_4f64_2_load:
466+
; AVX1-64: # %bb.0:
467+
; AVX1-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
468+
; AVX1-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
469+
; AVX1-64-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
470+
; AVX1-64-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
471+
; AVX1-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
472+
; AVX1-64-NEXT: retq
473+
;
474+
; AVX2-32-LABEL: test_buildvector_4f64_2_load:
475+
; AVX2-32: # %bb.0:
476+
; AVX2-32-NEXT: movl {{[0-9]+}}(%esp), %eax
477+
; AVX2-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
478+
; AVX2-32-NEXT: vbroadcastsd (%ecx), %ymm0
479+
; AVX2-32-NEXT: vbroadcastsd (%eax), %ymm1
480+
; AVX2-32-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
481+
; AVX2-32-NEXT: retl
482+
;
483+
; AVX2-64-LABEL: test_buildvector_4f64_2_load:
484+
; AVX2-64: # %bb.0:
485+
; AVX2-64-NEXT: vbroadcastsd (%rsi), %ymm0
486+
; AVX2-64-NEXT: vbroadcastsd (%rdi), %ymm1
487+
; AVX2-64-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
488+
; AVX2-64-NEXT: retq
463489
%a0 = load double, ptr %p0
464490
%a1 = load double, ptr %p1
465491
%v0 = insertelement <4 x double> poison, double %a0, i32 0

llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Lines changed: 66 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2368,25 +2368,57 @@ define <4 x double> @unpckh_v4f64(<4 x double> %x, <4 x double> %y) {
23682368
}
23692369

23702370
define <4 x double> @blend_broadcasts_v1f64(ptr %p0, ptr %p1) {
2371-
; ALL-LABEL: blend_broadcasts_v1f64:
2372-
; ALL: # %bb.0:
2373-
; ALL-NEXT: vbroadcastsd (%rsi), %ymm0
2374-
; ALL-NEXT: vbroadcastsd (%rdi), %ymm1
2375-
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2376-
; ALL-NEXT: retq
2371+
; AVX1-LABEL: blend_broadcasts_v1f64:
2372+
; AVX1: # %bb.0:
2373+
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2374+
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2375+
; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
2376+
; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2377+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2378+
; AVX1-NEXT: retq
2379+
;
2380+
; AVX2-LABEL: blend_broadcasts_v1f64:
2381+
; AVX2: # %bb.0:
2382+
; AVX2-NEXT: vbroadcastsd (%rsi), %ymm0
2383+
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
2384+
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2385+
; AVX2-NEXT: retq
2386+
;
2387+
; AVX512VL-LABEL: blend_broadcasts_v1f64:
2388+
; AVX512VL: # %bb.0:
2389+
; AVX512VL-NEXT: vbroadcastsd (%rsi), %ymm0
2390+
; AVX512VL-NEXT: vbroadcastsd (%rdi), %ymm1
2391+
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2392+
; AVX512VL-NEXT: retq
23772393
%ld0 = load <1 x double>, ptr %p0, align 32
23782394
%ld1 = load <1 x double>, ptr %p1, align 32
23792395
%blend = shufflevector <1 x double> %ld0, <1 x double> %ld1, <4 x i32> <i32 0, i32 1, i32 1, i32 0>
23802396
ret <4 x double> %blend
23812397
}
23822398

23832399
define <4 x double> @blend_broadcasts_v1f64_4x(ptr %p0, ptr %p1) {
2384-
; ALL-LABEL: blend_broadcasts_v1f64_4x:
2385-
; ALL: # %bb.0:
2386-
; ALL-NEXT: vbroadcastsd (%rsi), %ymm0
2387-
; ALL-NEXT: vbroadcastsd (%rdi), %ymm1
2388-
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2389-
; ALL-NEXT: retq
2400+
; AVX1-LABEL: blend_broadcasts_v1f64_4x:
2401+
; AVX1: # %bb.0:
2402+
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2403+
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2404+
; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
2405+
; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2406+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2407+
; AVX1-NEXT: retq
2408+
;
2409+
; AVX2-LABEL: blend_broadcasts_v1f64_4x:
2410+
; AVX2: # %bb.0:
2411+
; AVX2-NEXT: vbroadcastsd (%rsi), %ymm0
2412+
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
2413+
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2414+
; AVX2-NEXT: retq
2415+
;
2416+
; AVX512VL-LABEL: blend_broadcasts_v1f64_4x:
2417+
; AVX512VL: # %bb.0:
2418+
; AVX512VL-NEXT: vbroadcastsd (%rsi), %ymm0
2419+
; AVX512VL-NEXT: vbroadcastsd (%rdi), %ymm1
2420+
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2421+
; AVX512VL-NEXT: retq
23902422
%ld0 = load <1 x double>, ptr %p0, align 32
23912423
%ld1 = load <1 x double>, ptr %p1, align 32
23922424
%bcst0 = shufflevector <1 x double> %ld0, <1 x double> poison, <4 x i32> zeroinitializer
@@ -2396,12 +2428,28 @@ define <4 x double> @blend_broadcasts_v1f64_4x(ptr %p0, ptr %p1) {
23962428
}
23972429

23982430
define <4 x double> @blend_broadcasts_v1f64_2x(ptr %p0, ptr %p1) {
2399-
; ALL-LABEL: blend_broadcasts_v1f64_2x:
2400-
; ALL: # %bb.0:
2401-
; ALL-NEXT: vbroadcastsd (%rsi), %ymm0
2402-
; ALL-NEXT: vbroadcastsd (%rdi), %ymm1
2403-
; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2404-
; ALL-NEXT: retq
2431+
; AVX1-LABEL: blend_broadcasts_v1f64_2x:
2432+
; AVX1: # %bb.0:
2433+
; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
2434+
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
2435+
; AVX1-NEXT: vmovlhps {{.*#+}} xmm2 = xmm1[0],xmm0[0]
2436+
; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
2437+
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2438+
; AVX1-NEXT: retq
2439+
;
2440+
; AVX2-LABEL: blend_broadcasts_v1f64_2x:
2441+
; AVX2: # %bb.0:
2442+
; AVX2-NEXT: vbroadcastsd (%rsi), %ymm0
2443+
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1
2444+
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2445+
; AVX2-NEXT: retq
2446+
;
2447+
; AVX512VL-LABEL: blend_broadcasts_v1f64_2x:
2448+
; AVX512VL: # %bb.0:
2449+
; AVX512VL-NEXT: vbroadcastsd (%rsi), %ymm0
2450+
; AVX512VL-NEXT: vbroadcastsd (%rdi), %ymm1
2451+
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
2452+
; AVX512VL-NEXT: retq
24052453
%ld0 = load <1 x double>, ptr %p0, align 32
24062454
%ld1 = load <1 x double>, ptr %p1, align 32
24072455
%bcst0 = shufflevector <1 x double> %ld0, <1 x double> poison, <2 x i32> zeroinitializer

0 commit comments

Comments
 (0)