@@ -2431,6 +2431,47 @@ def DblwdCmp {
2431
2431
(v2i64 (XXSPLTW EQWSHAND, 2)), 0));
2432
2432
}
2433
2433
2434
+ class SplatAndAssignIndexed<
2435
+ SDPatternOperator op,
2436
+ int Total, dag splat,
2437
+ int Index, dag assign> {
2438
+ defvar head = !listsplat(splat, Index);
2439
+ defvar x = [assign];
2440
+ defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
2441
+ list<dag> Ops = !listconcat(head, x, tail);
2442
+ dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
2443
+ }
2444
+
2445
+ class BVExtLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
2446
+ build_vector,
2447
+ 2, (f64 fpimm0),
2448
+ Index, (f64 (extloadf32 ForceXForm:$src))>;
2449
+
2450
+ class BVZExtLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
2451
+ build_vector,
2452
+ 2, (i64 0),
2453
+ Index, (i64 (zextloadi32 ForceXForm:$src))>;
2454
+
2455
+ class BVLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
2456
+ build_vector,
2457
+ 4, (i32 0),
2458
+ Index, (i32 (load ForceXForm:$src))>;
2459
+
2460
+ class BVLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
2461
+ build_vector,
2462
+ 4, (f32 fpimm0),
2463
+ Index, (f32 (load ForceXForm:$src))>;
2464
+
2465
+ class BVLoadAndZerosDbl<int Index> : SplatAndAssignIndexed<
2466
+ build_vector,
2467
+ 2, (f64 fpimm0),
2468
+ Index, (f64 (load ForceXForm:$src))>;
2469
+
2470
+ class BVLoadAndZerosLong<int Index> : SplatAndAssignIndexed<
2471
+ build_vector,
2472
+ 2, (i64 0),
2473
+ Index, (i64 (load ForceXForm:$src))>;
2474
+
2434
2475
//---------------------------- Anonymous Patterns ----------------------------//
2435
2476
// Predicate combinations are kept in roughly chronological order in terms of
2436
2477
// instruction availability in the architecture. For example, VSX came in with
@@ -3449,6 +3490,53 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
3449
3490
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
3450
3491
sub_64), ForceXForm:$src)>;
3451
3492
}
3493
+
3494
+ // BUILD_VECTOR via single load and zeros.
3495
+ // Extension load.
3496
+ def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
3497
+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
3498
+ def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
3499
+ (v2f64 (XXPERMDIs
3500
+ (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
3501
+
3502
+ def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
3503
+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3504
+ def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
3505
+ (v2i64 (XXPERMDIs
3506
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
3507
+
3508
+ // Normal load.
3509
+ foreach Index = !range(4) in {
3510
+ defvar Temp = !sub(5, Index);
3511
+ defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
3512
+ if !ne(Offset, 0) then {
3513
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3514
+ (v4i32 (XXSLDWIs
3515
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3516
+ Offset))>;
3517
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3518
+ (v4f32 (XXSLDWIs
3519
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3520
+ Offset))>;
3521
+ } else {
3522
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3523
+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3524
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3525
+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3526
+ }
3527
+ }
3528
+
3529
+ def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
3530
+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3531
+ def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
3532
+ (v2f64 (XXPERMDIs
3533
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3534
+
3535
+ def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
3536
+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3537
+ def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
3538
+ (v2i64 (XXPERMDIs
3539
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3452
3540
} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
3453
3541
3454
3542
// Little endian Power8 VSX subtarget.
@@ -3542,6 +3630,54 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
3542
3630
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
3543
3631
sub_64), ForceXForm:$src)>;
3544
3632
}
3633
+
3634
+ // BUILD_VECTOR via single load and zeros.
3635
+ // Extension load.
3636
+ def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
3637
+ (v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
3638
+ def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
3639
+ (v2f64 (XXPERMDIs
3640
+ (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
3641
+
3642
+ def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
3643
+ (v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3644
+ def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
3645
+ (v2i64 (XXPERMDIs
3646
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
3647
+
3648
+ // Normal load.
3649
+ foreach Index = !range(4) in {
3650
+ defvar Temp = !sub(!add(Index, 4), 2);
3651
+ defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
3652
+ if !ne(Offset, 0) then {
3653
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3654
+ (v4i32 (XXSLDWIs
3655
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3656
+ Offset))>;
3657
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3658
+ (v4f32 (XXSLDWIs
3659
+ (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3660
+ Offset))>;
3661
+ } else {
3662
+ def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3663
+ (v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3664
+ def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3665
+ (v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3666
+ }
3667
+ }
3668
+
3669
+ def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
3670
+ (v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3671
+ def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
3672
+ (v2f64 (XXPERMDIs
3673
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3674
+
3675
+ def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
3676
+ (v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3677
+ def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
3678
+ (v2i64 (XXPERMDIs
3679
+ (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3680
+
3545
3681
} // HasVSX, HasP8Vector, IsLittleEndian
3546
3682
3547
3683
// Big endian pre-Power9 VSX subtarget.
0 commit comments