Skip to content

Commit d654ee9

Browse files
author
Thorsten Schütt
committed
anyext
1 parent dfcefca commit d654ee9

File tree

8 files changed

+123
-127
lines changed

8 files changed

+123
-127
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@ bool CombinerHelper::matchMergeXAndUndef(const MachineInstr &MI,
4545
//
4646
// ->
4747
//
48-
// %0:_(s16) = G_ZEXT %bits_0_7:(s8)
48+
// %0:_(s16) = G_ANYEXT %bits_0_7:(s8)
4949
//
5050

51-
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}}))
51+
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ANYEXT, {DstTy, SrcTy}}))
5252
return false;
5353

5454
MatchInfo = [=](MachineIRBuilder &B) {
55-
B.buildZExt(Dst, Merge->getSourceReg(0));
55+
B.buildAnyExt(Dst, Merge->getSourceReg(0));
5656
};
5757
return true;
5858
}

llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@ name: test_combine_unmerge_merge
99
body: |
1010
bb.1:
1111
; CHECK-LABEL: name: test_combine_unmerge_merge
12-
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
13-
; CHECK-NEXT: $w0 = COPY [[C]](s32)
14-
; CHECK-NEXT: $w1 = COPY [[C]](s32)
12+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
13+
; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
14+
; CHECK-NEXT: $w1 = COPY [[DEF]](s32)
1515
%0:_(s32) = G_IMPLICIT_DEF
1616
%1:_(s32) = G_IMPLICIT_DEF
1717
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -113,11 +113,9 @@ name: test_combine_unmerge_bitcast_merge
113113
body: |
114114
bb.1:
115115
; CHECK-LABEL: name: test_combine_unmerge_bitcast_merge
116-
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
117-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[C]](s64)
118-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
119-
; CHECK-NEXT: $w0 = COPY [[UV]](s32)
120-
; CHECK-NEXT: $w1 = COPY [[UV1]](s32)
116+
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
117+
; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
118+
; CHECK-NEXT: $w1 = COPY [[DEF]](s32)
121119
%0:_(s32) = G_IMPLICIT_DEF
122120
%1:_(s32) = G_IMPLICIT_DEF
123121
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -135,11 +133,11 @@ name: test_combine_unmerge_merge_incompatible_types
135133
body: |
136134
bb.1:
137135
; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types
138-
; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
139-
; CHECK-NEXT: $h0 = COPY [[C]](s16)
140-
; CHECK-NEXT: $h1 = COPY [[C]](s16)
141-
; CHECK-NEXT: $h2 = COPY [[C]](s16)
142-
; CHECK-NEXT: $h3 = COPY [[C]](s16)
136+
; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
137+
; CHECK-NEXT: $h0 = COPY [[DEF]](s16)
138+
; CHECK-NEXT: $h1 = COPY [[DEF]](s16)
139+
; CHECK-NEXT: $h2 = COPY [[DEF]](s16)
140+
; CHECK-NEXT: $h3 = COPY [[DEF]](s16)
143141
%0:_(s32) = G_IMPLICIT_DEF
144142
%1:_(s32) = G_IMPLICIT_DEF
145143
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -544,7 +542,7 @@ body: |
544542
bb.1:
545543
; CHECK-LABEL: name: test_merge_undef
546544
; CHECK: %opaque:_(s64) = COPY $x0
547-
; CHECK-NEXT: %me:_(s128) = G_ZEXT %opaque(s64)
545+
; CHECK-NEXT: %me:_(s128) = G_ANYEXT %opaque(s64)
548546
; CHECK-NEXT: $q0 = COPY %me(s128)
549547
%opaque:_(s64) = COPY $x0
550548
%def:_(s64) = G_IMPLICIT_DEF
@@ -560,7 +558,7 @@ body: |
560558
; CHECK-LABEL: name: test_merge_undef_multi_use
561559
; CHECK: %opaque:_(s64) = COPY $x0
562560
; CHECK-NEXT: %def:_(s64) = G_IMPLICIT_DEF
563-
; CHECK-NEXT: %me:_(s128) = G_ZEXT %opaque(s64)
561+
; CHECK-NEXT: %me:_(s128) = G_ANYEXT %opaque(s64)
564562
; CHECK-NEXT: $q0 = COPY %me(s128)
565563
; CHECK-NEXT: $x0 = COPY %def(s64)
566564
%opaque:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,24 @@ define i64 @bswap_i16_to_i64_anyext(i16 %a) {
4545

4646
; The zext here is optimised to an any_extend during isel..
4747
define i128 @bswap_i16_to_i128_anyext(i16 %a) {
48-
; CHECK-LABEL: bswap_i16_to_i128_anyext:
49-
; CHECK: // %bb.0:
50-
; CHECK-NEXT: mov w8, w0
51-
; CHECK-NEXT: mov x0, xzr
52-
; CHECK-NEXT: rev w8, w8
53-
; CHECK-NEXT: lsr w8, w8, #16
54-
; CHECK-NEXT: lsl x1, x8, #48
55-
; CHECK-NEXT: ret
48+
; CHECK-SD-LABEL: bswap_i16_to_i128_anyext:
49+
; CHECK-SD: // %bb.0:
50+
; CHECK-SD-NEXT: mov w8, w0
51+
; CHECK-SD-NEXT: mov x0, xzr
52+
; CHECK-SD-NEXT: rev w8, w8
53+
; CHECK-SD-NEXT: lsr w8, w8, #16
54+
; CHECK-SD-NEXT: lsl x1, x8, #48
55+
; CHECK-SD-NEXT: ret
56+
;
57+
; CHECK-GI-LABEL: bswap_i16_to_i128_anyext:
58+
; CHECK-GI: // %bb.0:
59+
; CHECK-GI-NEXT: mov w8, w0
60+
; CHECK-GI-NEXT: mov x0, xzr
61+
; CHECK-GI-NEXT: rev w8, w8
62+
; CHECK-GI-NEXT: lsr w8, w8, #16
63+
; CHECK-GI-NEXT: and x8, x8, #0xffff
64+
; CHECK-GI-NEXT: lsl x1, x8, #48
65+
; CHECK-GI-NEXT: ret
5666
%3 = call i16 @llvm.bswap.i16(i16 %a)
5767
%4 = zext i16 %3 to i128
5868
%5 = shl i128 %4, 112

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,22 +1884,22 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
18841884
define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
18851885
; GCN-LABEL: s_ashr_i65_33:
18861886
; GCN: ; %bb.0:
1887-
; GCN-NEXT: s_mov_b32 s3, 0
1888-
; GCN-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1889-
; GCN-NEXT: s_lshr_b32 s2, s1, 1
1890-
; GCN-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1891-
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1892-
; GCN-NEXT: s_ashr_i32 s2, s5, 1
1887+
; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1888+
; GCN-NEXT: s_lshr_b32 s0, s1, 1
1889+
; GCN-NEXT: s_mov_b32 s1, 0
1890+
; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1891+
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1892+
; GCN-NEXT: s_ashr_i32 s2, s3, 1
18931893
; GCN-NEXT: ; return to shader part epilog
18941894
;
18951895
; GFX10PLUS-LABEL: s_ashr_i65_33:
18961896
; GFX10PLUS: ; %bb.0:
1897-
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1898-
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1899-
; GFX10PLUS-NEXT: s_lshr_b32 s2, s1, 1
1900-
; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1901-
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1902-
; GFX10PLUS-NEXT: s_ashr_i32 s2, s5, 1
1897+
; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1898+
; GFX10PLUS-NEXT: s_lshr_b32 s0, s1, 1
1899+
; GFX10PLUS-NEXT: s_mov_b32 s1, 0
1900+
; GFX10PLUS-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1901+
; GFX10PLUS-NEXT: s_ashr_i32 s2, s3, 1
1902+
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
19031903
; GFX10PLUS-NEXT: ; return to shader part epilog
19041904
%result = ashr i65 %value, 33
19051905
ret i65 %result

llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll

Lines changed: 41 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,8 +1574,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
15741574
; GFX6-LABEL: v_lshr_i65:
15751575
; GFX6: ; %bb.0:
15761576
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577-
; GFX6-NEXT: v_mov_b32_e32 v5, 0
15781577
; GFX6-NEXT: v_and_b32_e32 v4, 1, v2
1578+
; GFX6-NEXT: v_mov_b32_e32 v5, 0
15791579
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
15801580
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
15811581
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
@@ -1596,8 +1596,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
15961596
; GFX8-LABEL: v_lshr_i65:
15971597
; GFX8: ; %bb.0:
15981598
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599-
; GFX8-NEXT: v_mov_b32_e32 v5, 0
16001599
; GFX8-NEXT: v_and_b32_e32 v4, 1, v2
1600+
; GFX8-NEXT: v_mov_b32_e32 v5, 0
16011601
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
16021602
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
16031603
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
@@ -1618,8 +1618,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
16181618
; GFX9-LABEL: v_lshr_i65:
16191619
; GFX9: ; %bb.0:
16201620
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1621-
; GFX9-NEXT: v_mov_b32_e32 v5, 0
16221621
; GFX9-NEXT: v_and_b32_e32 v4, 1, v2
1622+
; GFX9-NEXT: v_mov_b32_e32 v5, 0
16231623
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
16241624
; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
16251625
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
@@ -1688,8 +1688,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
16881688
; GFX6: ; %bb.0:
16891689
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16901690
; GFX6-NEXT: v_mov_b32_e32 v3, v1
1691-
; GFX6-NEXT: v_mov_b32_e32 v1, 0
16921691
; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
1692+
; GFX6-NEXT: v_mov_b32_e32 v1, 0
16931693
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
16941694
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v3
16951695
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1700,8 +1700,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
17001700
; GFX8: ; %bb.0:
17011701
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17021702
; GFX8-NEXT: v_mov_b32_e32 v3, v1
1703-
; GFX8-NEXT: v_mov_b32_e32 v1, 0
17041703
; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
1704+
; GFX8-NEXT: v_mov_b32_e32 v1, 0
17051705
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
17061706
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v3
17071707
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1712,8 +1712,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
17121712
; GFX9: ; %bb.0:
17131713
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17141714
; GFX9-NEXT: v_mov_b32_e32 v3, v1
1715-
; GFX9-NEXT: v_mov_b32_e32 v1, 0
17161715
; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
1716+
; GFX9-NEXT: v_mov_b32_e32 v1, 0
17171717
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
17181718
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
17191719
; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1749,22 +1749,20 @@ define i65 @v_lshr_i65_33(i65 %value) {
17491749
define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
17501750
; GCN-LABEL: s_lshr_i65:
17511751
; GCN: ; %bb.0:
1752-
; GCN-NEXT: s_mov_b32 s4, s3
1753-
; GCN-NEXT: s_mov_b32 s3, 0
1754-
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], 1
1755-
; GCN-NEXT: s_sub_i32 s10, s4, 64
1756-
; GCN-NEXT: s_sub_i32 s8, 64, s4
1757-
; GCN-NEXT: s_cmp_lt_u32 s4, 64
1752+
; GCN-NEXT: s_and_b64 s[4:5], s[2:3], 1
1753+
; GCN-NEXT: s_sub_i32 s10, s3, 64
1754+
; GCN-NEXT: s_sub_i32 s8, 64, s3
1755+
; GCN-NEXT: s_cmp_lt_u32 s3, 64
17581756
; GCN-NEXT: s_cselect_b32 s11, 1, 0
1759-
; GCN-NEXT: s_cmp_eq_u32 s4, 0
1757+
; GCN-NEXT: s_cmp_eq_u32 s3, 0
17601758
; GCN-NEXT: s_cselect_b32 s12, 1, 0
1761-
; GCN-NEXT: s_lshr_b64 s[6:7], s[2:3], s4
1762-
; GCN-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
1763-
; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
1764-
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
1765-
; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
1759+
; GCN-NEXT: s_lshr_b64 s[6:7], s[4:5], s3
1760+
; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
1761+
; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
1762+
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
1763+
; GCN-NEXT: s_lshr_b64 s[4:5], s[4:5], s10
17661764
; GCN-NEXT: s_cmp_lg_u32 s11, 0
1767-
; GCN-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
1765+
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
17681766
; GCN-NEXT: s_cmp_lg_u32 s12, 0
17691767
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
17701768
; GCN-NEXT: s_cmp_lg_u32 s11, 0
@@ -1773,26 +1771,24 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
17731771
;
17741772
; GFX10PLUS-LABEL: s_lshr_i65:
17751773
; GFX10PLUS: ; %bb.0:
1776-
; GFX10PLUS-NEXT: s_mov_b32 s4, s3
1777-
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1778-
; GFX10PLUS-NEXT: s_sub_i32 s10, s4, 64
1779-
; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[2:3], 1
1780-
; GFX10PLUS-NEXT: s_sub_i32 s5, 64, s4
1781-
; GFX10PLUS-NEXT: s_cmp_lt_u32 s4, 64
1774+
; GFX10PLUS-NEXT: s_and_b64 s[4:5], s[2:3], 1
1775+
; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
1776+
; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
1777+
; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
17821778
; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
1783-
; GFX10PLUS-NEXT: s_cmp_eq_u32 s4, 0
1779+
; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
17841780
; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
1785-
; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s4
1786-
; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[2:3], s5
1787-
; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[2:3], s4
1781+
; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
1782+
; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2
1783+
; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[4:5], s3
17881784
; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
1789-
; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
1785+
; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[4:5], s10
17901786
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1791-
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
1787+
; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
17921788
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
1793-
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
1789+
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
17941790
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1795-
; GFX10PLUS-NEXT: s_cselect_b32 s2, s4, 0
1791+
; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, 0
17961792
; GFX10PLUS-NEXT: ; return to shader part epilog
17971793
%result = lshr i65 %value, %amount
17981794
ret i65 %result
@@ -1801,22 +1797,22 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
18011797
define amdgpu_ps i65 @s_lshr_i65_33(i65 inreg %value) {
18021798
; GCN-LABEL: s_lshr_i65_33:
18031799
; GCN: ; %bb.0:
1804-
; GCN-NEXT: s_mov_b32 s3, 0
1805-
; GCN-NEXT: s_and_b64 s[4:5], s[2:3], 1
1806-
; GCN-NEXT: s_lshr_b32 s2, s1, 1
1807-
; GCN-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1808-
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1809-
; GCN-NEXT: s_lshr_b32 s2, s5, 1
1800+
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], 1
1801+
; GCN-NEXT: s_lshr_b32 s0, s1, 1
1802+
; GCN-NEXT: s_mov_b32 s1, 0
1803+
; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1804+
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1805+
; GCN-NEXT: s_lshr_b32 s2, s3, 1
18101806
; GCN-NEXT: ; return to shader part epilog
18111807
;
18121808
; GFX10PLUS-LABEL: s_lshr_i65_33:
18131809
; GFX10PLUS: ; %bb.0:
1814-
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1815-
; GFX10PLUS-NEXT: s_and_b64 s[4:5], s[2:3], 1
1816-
; GFX10PLUS-NEXT: s_lshr_b32 s2, s1, 1
1817-
; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1818-
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1819-
; GFX10PLUS-NEXT: s_lshr_b32 s2, s5, 1
1810+
; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[2:3], 1
1811+
; GFX10PLUS-NEXT: s_lshr_b32 s0, s1, 1
1812+
; GFX10PLUS-NEXT: s_mov_b32 s1, 0
1813+
; GFX10PLUS-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1814+
; GFX10PLUS-NEXT: s_lshr_b32 s2, s3, 1
1815+
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
18201816
; GFX10PLUS-NEXT: ; return to shader part epilog
18211817
%result = lshr i65 %value, 33
18221818
ret i65 %result

llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1440,7 +1440,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
14401440
; GFX6-LABEL: v_sext_inreg_i65_22:
14411441
; GFX6: ; %bb.0:
14421442
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1443-
; GFX6-NEXT: v_mov_b32_e32 v3, 0
14441443
; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 22
14451444
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 10, v1
14461445
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
@@ -1456,7 +1455,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
14561455
; GFX8-LABEL: v_sext_inreg_i65_22:
14571456
; GFX8: ; %bb.0:
14581457
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1459-
; GFX8-NEXT: v_mov_b32_e32 v3, 0
14601458
; GFX8-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
14611459
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 10, v1
14621460
; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
@@ -1472,7 +1470,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
14721470
; GFX9-LABEL: v_sext_inreg_i65_22:
14731471
; GFX9: ; %bb.0:
14741472
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1475-
; GFX9-NEXT: v_mov_b32_e32 v3, 0
14761473
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
14771474
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 10, v1
14781475
; GFX9-NEXT: v_or_b32_e32 v2, v2, v3
@@ -1487,7 +1484,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
14871484
; GFX10PLUS-LABEL: v_sext_inreg_i65_22:
14881485
; GFX10PLUS: ; %bb.0:
14891486
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1490-
; GFX10PLUS-NEXT: v_mov_b32_e32 v3, 0
14911487
; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
14921488
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1
14931489
; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
@@ -1559,29 +1555,29 @@ define i65 @v_sext_inreg_i65_33(i65 %value) {
15591555
define amdgpu_ps i65 @s_sext_inreg_i65_18(i65 inreg %value) {
15601556
; GCN-LABEL: s_sext_inreg_i65_18:
15611557
; GCN: ; %bb.0:
1562-
; GCN-NEXT: s_mov_b32 s3, 0
1563-
; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 18
1564-
; GCN-NEXT: s_lshr_b32 s2, s1, 14
1565-
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[2:3]
1566-
; GCN-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
1558+
; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], 18
1559+
; GCN-NEXT: s_lshr_b32 s4, s1, 14
1560+
; GCN-NEXT: s_mov_b32 s5, 0
1561+
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
1562+
; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
15671563
; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000
1568-
; GCN-NEXT: s_lshl_b32 s7, s4, 14
1569-
; GCN-NEXT: s_mov_b32 s6, s3
1564+
; GCN-NEXT: s_lshl_b32 s7, s2, 14
1565+
; GCN-NEXT: s_mov_b32 s6, s5
15701566
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7]
1571-
; GCN-NEXT: s_ashr_i64 s[2:3], s[4:5], 18
1567+
; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 18
15721568
; GCN-NEXT: ; return to shader part epilog
15731569
;
15741570
; GFX10PLUS-LABEL: s_sext_inreg_i65_18:
15751571
; GFX10PLUS: ; %bb.0:
1576-
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1577-
; GFX10PLUS-NEXT: s_lshl_b64 s[4:5], s[2:3], 18
1578-
; GFX10PLUS-NEXT: s_lshr_b32 s2, s1, 14
1572+
; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[2:3], 18
1573+
; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 14
1574+
; GFX10PLUS-NEXT: s_mov_b32 s5, 0
15791575
; GFX10PLUS-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000
1580-
; GFX10PLUS-NEXT: s_or_b64 s[4:5], s[4:5], s[2:3]
1581-
; GFX10PLUS-NEXT: s_mov_b32 s6, s3
1582-
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000
1583-
; GFX10PLUS-NEXT: s_lshl_b32 s7, s4, 14
1584-
; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[4:5], 18
1576+
; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5]
1577+
; GFX10PLUS-NEXT: s_mov_b32 s6, s5
1578+
; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1579+
; GFX10PLUS-NEXT: s_lshl_b32 s7, s2, 14
1580+
; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[2:3], 18
15851581
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7]
15861582
; GFX10PLUS-NEXT: ; return to shader part epilog
15871583
%shl = shl i65 %value, 18

0 commit comments

Comments
 (0)