Skip to content

Commit b96d0b4

Browse files
committed
Eliminate sub_ss, sub_sd from broadcast patterns.
The (COPY_TO_REGCLASS GR32:$src, VR128) pattern looks odd, but copyPhysReg does the right thing with it. (The old pattern would eventually produce the same cross-class copy). llvm-svn: 160830
1 parent 8d3e824 commit b96d0b4

File tree

1 file changed

+16
-42
lines changed

1 file changed

+16
-42
lines changed

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 16 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -7704,24 +7704,18 @@ let Predicates = [HasAVX2] in {
77047704
// is used by additional users, which prevents the pattern selection.
77057705
let AddedComplexity = 20 in {
77067706
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
7707-
(VBROADCASTSSrr
7708-
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
7707+
(VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
77097708
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
7710-
(VBROADCASTSSYrr
7711-
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss))>;
7709+
(VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
77127710
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
7713-
(VBROADCASTSDYrr
7714-
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd))>;
7711+
(VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
77157712

77167713
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
7717-
(VBROADCASTSSrr
7718-
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
7714+
(VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
77197715
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
7720-
(VBROADCASTSSYrr
7721-
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss))>;
7716+
(VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
77227717
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
7723-
(VBROADCASTSDYrr
7724-
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd))>;
7718+
(VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>;
77257719
}
77267720
}
77277721

@@ -7745,46 +7739,26 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
77457739
let AddedComplexity = 20 in {
77467740
// 128bit broadcasts:
77477741
def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
7748-
(VPSHUFDri
7749-
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0)>;
7742+
(VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
77507743
def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
77517744
(VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)),
7752-
(VPSHUFDri
7753-
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss), 0),
7754-
sub_xmm),
7755-
(VPSHUFDri
7756-
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss),
7757-
0), 1)>;
7745+
(VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm),
7746+
(VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>;
77587747
def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
77597748
(VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)),
7760-
(VPSHUFDri
7761-
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
7762-
0x44),
7763-
sub_xmm),
7764-
(VPSHUFDri
7765-
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd),
7766-
0x44), 1)>;
7749+
(VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm),
7750+
(VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>;
77677751

77687752
def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
7769-
(VPSHUFDri
7770-
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0)>;
7753+
(VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>;
77717754
def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
77727755
(VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)),
7773-
(VPSHUFDri
7774-
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss), 0),
7775-
sub_xmm),
7776-
(VPSHUFDri
7777-
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), GR32:$src, sub_ss),
7778-
0), 1)>;
7756+
(VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm),
7757+
(VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>;
77797758
def : Pat<(v4i64 (X86VBroadcast GR64:$src)),
77807759
(VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)),
7781-
(VPSHUFDri
7782-
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
7783-
0x44),
7784-
sub_xmm),
7785-
(VPSHUFDri
7786-
(INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src, sub_sd),
7787-
0x44), 1)>;
7760+
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm),
7761+
(VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>;
77887762
}
77897763
}
77907764

0 commit comments

Comments
 (0)