Skip to content

Commit fe5c6b5

Browse files
author
Kai Luo
committed
Optimize BUILD_VECTOR
1 parent effaf41 commit fe5c6b5

File tree

3 files changed

+202
-227
lines changed

3 files changed

+202
-227
lines changed

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2431,6 +2431,47 @@ def DblwdCmp {
24312431
(v2i64 (XXSPLTW EQWSHAND, 2)), 0));
24322432
}
24332433

2434+
class SplatAndAssignIndexed<
2435+
SDPatternOperator op,
2436+
int Total, dag splat,
2437+
int Index, dag assign> {
2438+
defvar head = !listsplat(splat, Index);
2439+
defvar x = [assign];
2440+
defvar tail = !listsplat(splat, !sub(!sub(Total, Index), 1));
2441+
list<dag> Ops = !listconcat(head, x, tail);
2442+
dag DAG = !foldl((op), Ops, a, b, !con(a, (op b)));
2443+
}
2444+
2445+
class BVExtLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
2446+
build_vector,
2447+
2, (f64 fpimm0),
2448+
Index, (f64 (extloadf32 ForceXForm:$src))>;
2449+
2450+
class BVZExtLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
2451+
build_vector,
2452+
2, (i64 0),
2453+
Index, (i64 (zextloadi32 ForceXForm:$src))>;
2454+
2455+
class BVLoadAndZerosInt<int Index> : SplatAndAssignIndexed<
2456+
build_vector,
2457+
4, (i32 0),
2458+
Index, (i32 (load ForceXForm:$src))>;
2459+
2460+
class BVLoadAndZerosFP<int Index> : SplatAndAssignIndexed<
2461+
build_vector,
2462+
4, (f32 fpimm0),
2463+
Index, (f32 (load ForceXForm:$src))>;
2464+
2465+
class BVLoadAndZerosDbl<int Index> : SplatAndAssignIndexed<
2466+
build_vector,
2467+
2, (f64 fpimm0),
2468+
Index, (f64 (load ForceXForm:$src))>;
2469+
2470+
class BVLoadAndZerosLong<int Index> : SplatAndAssignIndexed<
2471+
build_vector,
2472+
2, (i64 0),
2473+
Index, (i64 (load ForceXForm:$src))>;
2474+
24342475
//---------------------------- Anonymous Patterns ----------------------------//
24352476
// Predicate combinations are kept in roughly chronological order in terms of
24362477
// instruction availability in the architecture. For example, VSX came in with
@@ -3449,6 +3490,53 @@ foreach Idx = [ [0,3], [2,1], [3,2] ] in {
34493490
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
34503491
sub_64), ForceXForm:$src)>;
34513492
}
3493+
3494+
// BUILD_VECTOR via single load and zeros.
3495+
// Extension load.
3496+
def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
3497+
(v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
3498+
def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
3499+
(v2f64 (XXPERMDIs
3500+
(COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
3501+
3502+
def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
3503+
(v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3504+
def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
3505+
(v2i64 (XXPERMDIs
3506+
(COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
3507+
3508+
// Normal load.
3509+
foreach Index = !range(4) in {
3510+
defvar Temp = !sub(5, Index);
3511+
defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
3512+
if !ne(Offset, 0) then {
3513+
def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3514+
(v4i32 (XXSLDWIs
3515+
(COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3516+
Offset))>;
3517+
def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3518+
(v4f32 (XXSLDWIs
3519+
(COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3520+
Offset))>;
3521+
} else {
3522+
def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3523+
(v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3524+
def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3525+
(v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3526+
}
3527+
}
3528+
3529+
def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
3530+
(v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3531+
def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
3532+
(v2f64 (XXPERMDIs
3533+
(COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3534+
3535+
def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
3536+
(v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3537+
def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
3538+
(v2i64 (XXPERMDIs
3539+
(COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
34523540
} // HasVSX, HasP8Vector, IsBigEndian, IsPPC64
34533541

34543542
// Little endian Power8 VSX subtarget.
@@ -3542,6 +3630,54 @@ foreach Idx = [ [0,2], [1,1], [3,3] ] in {
35423630
(STIWX (EXTRACT_SUBREG (XXSLDWI $A, $A, !head(!tail(Idx))),
35433631
sub_64), ForceXForm:$src)>;
35443632
}
3633+
3634+
// BUILD_VECTOR via single load and zeros.
3635+
// Extension load.
3636+
def : Pat<(v2f64 BVExtLoadAndZerosFP<1>.DAG),
3637+
(v2f64 (COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC))>;
3638+
def : Pat<(v2f64 BVExtLoadAndZerosFP<0>.DAG),
3639+
(v2f64 (XXPERMDIs
3640+
(COPY_TO_REGCLASS (LXSSPX ForceXForm:$src), VSRC), 2))>;
3641+
3642+
def : Pat<(v2i64 BVZExtLoadAndZerosInt<1>.DAG),
3643+
(v2i64 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3644+
def : Pat<(v2i64 BVZExtLoadAndZerosInt<0>.DAG),
3645+
(v2i64 (XXPERMDIs
3646+
(COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC), 2))>;
3647+
3648+
// Normal load.
3649+
foreach Index = !range(4) in {
3650+
defvar Temp = !sub(!add(Index, 4), 2);
3651+
defvar Offset = !if(!gt(Temp, 3), !sub(Temp, 4), Temp);
3652+
if !ne(Offset, 0) then {
3653+
def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3654+
(v4i32 (XXSLDWIs
3655+
(COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3656+
Offset))>;
3657+
def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3658+
(v4f32 (XXSLDWIs
3659+
(COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC),
3660+
Offset))>;
3661+
} else {
3662+
def : Pat<(v4i32 BVLoadAndZerosInt<Index>.DAG),
3663+
(v4i32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3664+
def : Pat<(v4f32 BVLoadAndZerosFP<Index>.DAG),
3665+
(v4f32 (COPY_TO_REGCLASS (LXSIWZX ForceXForm:$src), VSRC))>;
3666+
}
3667+
}
3668+
3669+
def : Pat<(v2f64 BVLoadAndZerosDbl<1>.DAG),
3670+
(v2f64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3671+
def : Pat<(v2f64 BVLoadAndZerosDbl<0>.DAG),
3672+
(v2f64 (XXPERMDIs
3673+
(COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3674+
3675+
def : Pat<(v2i64 BVLoadAndZerosLong<1>.DAG),
3676+
(v2i64 (COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC))>;
3677+
def : Pat<(v2i64 BVLoadAndZerosLong<0>.DAG),
3678+
(v2i64 (XXPERMDIs
3679+
(COPY_TO_REGCLASS (LXSDX ForceXForm:$src), VSRC), 2))>;
3680+
35453681
} // HasVSX, HasP8Vector, IsLittleEndian
35463682

35473683
// Big endian pre-Power9 VSX subtarget.

0 commit comments

Comments
 (0)