Skip to content

Commit 7956211

Browse files
author
Thorsten Schütt
committed
address review cmments
1 parent ad9a283 commit 7956211

File tree

8 files changed

+121
-130
lines changed

8 files changed

+121
-130
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelperArtifacts.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -37,23 +37,22 @@ bool CombinerHelper::matchMergeXAndUndef(const MachineInstr &MI,
3737
LLT SrcTy = MRI.getType(Merge->getSourceReg(0));
3838

3939
// Otherwise, we would miscompile.
40-
if (Merge->getNumSources() > 2)
41-
return false;
40+
assert(Merge->getNumSources() == 2 && "Unexpected number of operands");
4241

4342
//
4443
// %bits_8_15:_(s8) = G_IMPLICIT_DEF
4544
// %0:_(s16) = G_MERGE_VALUES %bits_0_7:(s8), %bits_8_15:(s8)
4645
//
4746
// ->
4847
//
49-
// %0:_(s16) = G_ZEXT %bits_0_7:(s8)
48+
// %0:_(s16) = G_ANYEXT %bits_0_7:(s8)
5049
//
5150

52-
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {DstTy, SrcTy}}))
51+
if (!isLegalOrBeforeLegalizer({TargetOpcode::G_ANYEXT, {DstTy, SrcTy}}))
5352
return false;
5453

5554
MatchInfo = [=](MachineIRBuilder &B) {
56-
B.buildZExt(Dst, Merge->getSourceReg(0));
55+
B.buildAnyExt(Dst, Merge->getSourceReg(0));
5756
};
5857
return true;
5958
}

llvm/test/CodeGen/AArch64/GlobalISel/combine-unmerge.mir

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@ body: |
1010
bb.1:
1111
; CHECK-LABEL: name: test_combine_unmerge_merge
1212
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
13-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
1413
; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
15-
; CHECK-NEXT: $w1 = COPY [[C]](s32)
14+
; CHECK-NEXT: $w1 = COPY [[DEF]](s32)
1615
%0:_(s32) = G_IMPLICIT_DEF
1716
%1:_(s32) = G_IMPLICIT_DEF
1817
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -115,11 +114,8 @@ body: |
115114
bb.1:
116115
; CHECK-LABEL: name: test_combine_unmerge_bitcast_merge
117116
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
118-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[DEF]](s32)
119-
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[ZEXT]](s64)
120-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]](<2 x s32>)
121-
; CHECK-NEXT: $w0 = COPY [[UV]](s32)
122-
; CHECK-NEXT: $w1 = COPY [[UV1]](s32)
117+
; CHECK-NEXT: $w0 = COPY [[DEF]](s32)
118+
; CHECK-NEXT: $w1 = COPY [[DEF]](s32)
123119
%0:_(s32) = G_IMPLICIT_DEF
124120
%1:_(s32) = G_IMPLICIT_DEF
125121
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -137,13 +133,11 @@ name: test_combine_unmerge_merge_incompatible_types
137133
body: |
138134
bb.1:
139135
; CHECK-LABEL: name: test_combine_unmerge_merge_incompatible_types
140-
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
141-
; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[DEF]](s32)
142-
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[ZEXT]](s64)
143-
; CHECK-NEXT: $h0 = COPY [[UV]](s16)
144-
; CHECK-NEXT: $h1 = COPY [[UV1]](s16)
145-
; CHECK-NEXT: $h2 = COPY [[UV2]](s16)
146-
; CHECK-NEXT: $h3 = COPY [[UV3]](s16)
136+
; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
137+
; CHECK-NEXT: $h0 = COPY [[DEF]](s16)
138+
; CHECK-NEXT: $h1 = COPY [[DEF]](s16)
139+
; CHECK-NEXT: $h2 = COPY [[DEF]](s16)
140+
; CHECK-NEXT: $h3 = COPY [[DEF]](s16)
147141
%0:_(s32) = G_IMPLICIT_DEF
148142
%1:_(s32) = G_IMPLICIT_DEF
149143
%2:_(s64) = G_MERGE_VALUES %0(s32), %1(s32)
@@ -548,7 +542,7 @@ body: |
548542
bb.1:
549543
; CHECK-LABEL: name: test_merge_undef
550544
; CHECK: %opaque:_(s64) = COPY $x0
551-
; CHECK-NEXT: %me:_(s128) = G_ZEXT %opaque(s64)
545+
; CHECK-NEXT: %me:_(s128) = G_ANYEXT %opaque(s64)
552546
; CHECK-NEXT: $q0 = COPY %me(s128)
553547
%opaque:_(s64) = COPY $x0
554548
%def:_(s64) = G_IMPLICIT_DEF
@@ -564,7 +558,7 @@ body: |
564558
; CHECK-LABEL: name: test_merge_undef_multi_use
565559
; CHECK: %opaque:_(s64) = COPY $x0
566560
; CHECK-NEXT: %def:_(s64) = G_IMPLICIT_DEF
567-
; CHECK-NEXT: %me:_(s128) = G_ZEXT %opaque(s64)
561+
; CHECK-NEXT: %me:_(s128) = G_ANYEXT %opaque(s64)
568562
; CHECK-NEXT: $q0 = COPY %me(s128)
569563
; CHECK-NEXT: $x0 = COPY %def(s64)
570564
%opaque:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/bswap.ll

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,24 @@ define i64 @bswap_i16_to_i64_anyext(i16 %a) {
4545

4646
; The zext here is optimised to an any_extend during isel..
4747
define i128 @bswap_i16_to_i128_anyext(i16 %a) {
48-
; CHECK-LABEL: bswap_i16_to_i128_anyext:
49-
; CHECK: // %bb.0:
50-
; CHECK-NEXT: mov w8, w0
51-
; CHECK-NEXT: mov x0, xzr
52-
; CHECK-NEXT: rev w8, w8
53-
; CHECK-NEXT: lsr w8, w8, #16
54-
; CHECK-NEXT: lsl x1, x8, #48
55-
; CHECK-NEXT: ret
48+
; CHECK-SD-LABEL: bswap_i16_to_i128_anyext:
49+
; CHECK-SD: // %bb.0:
50+
; CHECK-SD-NEXT: mov w8, w0
51+
; CHECK-SD-NEXT: mov x0, xzr
52+
; CHECK-SD-NEXT: rev w8, w8
53+
; CHECK-SD-NEXT: lsr w8, w8, #16
54+
; CHECK-SD-NEXT: lsl x1, x8, #48
55+
; CHECK-SD-NEXT: ret
56+
;
57+
; CHECK-GI-LABEL: bswap_i16_to_i128_anyext:
58+
; CHECK-GI: // %bb.0:
59+
; CHECK-GI-NEXT: mov w8, w0
60+
; CHECK-GI-NEXT: mov x0, xzr
61+
; CHECK-GI-NEXT: rev w8, w8
62+
; CHECK-GI-NEXT: lsr w8, w8, #16
63+
; CHECK-GI-NEXT: and x8, x8, #0xffff
64+
; CHECK-GI-NEXT: lsl x1, x8, #48
65+
; CHECK-GI-NEXT: ret
5666
%3 = call i16 @llvm.bswap.i16(i16 %a)
5767
%4 = zext i16 %3 to i128
5868
%5 = shl i128 %4, 112

llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,22 +1884,22 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
18841884
define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) {
18851885
; GCN-LABEL: s_ashr_i65_33:
18861886
; GCN: ; %bb.0:
1887-
; GCN-NEXT: s_mov_b32 s3, 0
1888-
; GCN-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1889-
; GCN-NEXT: s_lshr_b32 s2, s1, 1
1890-
; GCN-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1891-
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1892-
; GCN-NEXT: s_ashr_i32 s2, s5, 1
1887+
; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1888+
; GCN-NEXT: s_lshr_b32 s0, s1, 1
1889+
; GCN-NEXT: s_mov_b32 s1, 0
1890+
; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1891+
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1892+
; GCN-NEXT: s_ashr_i32 s2, s3, 1
18931893
; GCN-NEXT: ; return to shader part epilog
18941894
;
18951895
; GFX10PLUS-LABEL: s_ashr_i65_33:
18961896
; GFX10PLUS: ; %bb.0:
1897-
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1898-
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
1899-
; GFX10PLUS-NEXT: s_lshr_b32 s2, s1, 1
1900-
; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1901-
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1902-
; GFX10PLUS-NEXT: s_ashr_i32 s2, s5, 1
1897+
; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000
1898+
; GFX10PLUS-NEXT: s_lshr_b32 s0, s1, 1
1899+
; GFX10PLUS-NEXT: s_mov_b32 s1, 0
1900+
; GFX10PLUS-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1901+
; GFX10PLUS-NEXT: s_ashr_i32 s2, s3, 1
1902+
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
19031903
; GFX10PLUS-NEXT: ; return to shader part epilog
19041904
%result = ashr i65 %value, 33
19051905
ret i65 %result

llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll

Lines changed: 41 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1574,8 +1574,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
15741574
; GFX6-LABEL: v_lshr_i65:
15751575
; GFX6: ; %bb.0:
15761576
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577-
; GFX6-NEXT: v_mov_b32_e32 v5, 0
15781577
; GFX6-NEXT: v_and_b32_e32 v4, 1, v2
1578+
; GFX6-NEXT: v_mov_b32_e32 v5, 0
15791579
; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3
15801580
; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3
15811581
; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3
@@ -1596,8 +1596,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
15961596
; GFX8-LABEL: v_lshr_i65:
15971597
; GFX8: ; %bb.0:
15981598
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599-
; GFX8-NEXT: v_mov_b32_e32 v5, 0
16001599
; GFX8-NEXT: v_and_b32_e32 v4, 1, v2
1600+
; GFX8-NEXT: v_mov_b32_e32 v5, 0
16011601
; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3
16021602
; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3
16031603
; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
@@ -1618,8 +1618,8 @@ define i65 @v_lshr_i65(i65 %value, i65 %amount) {
16181618
; GFX9-LABEL: v_lshr_i65:
16191619
; GFX9: ; %bb.0:
16201620
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1621-
; GFX9-NEXT: v_mov_b32_e32 v5, 0
16221621
; GFX9-NEXT: v_and_b32_e32 v4, 1, v2
1622+
; GFX9-NEXT: v_mov_b32_e32 v5, 0
16231623
; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3
16241624
; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3
16251625
; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1]
@@ -1688,8 +1688,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
16881688
; GFX6: ; %bb.0:
16891689
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16901690
; GFX6-NEXT: v_mov_b32_e32 v3, v1
1691-
; GFX6-NEXT: v_mov_b32_e32 v1, 0
16921691
; GFX6-NEXT: v_and_b32_e32 v0, 1, v2
1692+
; GFX6-NEXT: v_mov_b32_e32 v1, 0
16931693
; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
16941694
; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v3
16951695
; GFX6-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1700,8 +1700,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
17001700
; GFX8: ; %bb.0:
17011701
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17021702
; GFX8-NEXT: v_mov_b32_e32 v3, v1
1703-
; GFX8-NEXT: v_mov_b32_e32 v1, 0
17041703
; GFX8-NEXT: v_and_b32_e32 v0, 1, v2
1704+
; GFX8-NEXT: v_mov_b32_e32 v1, 0
17051705
; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
17061706
; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v3
17071707
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1712,8 +1712,8 @@ define i65 @v_lshr_i65_33(i65 %value) {
17121712
; GFX9: ; %bb.0:
17131713
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
17141714
; GFX9-NEXT: v_mov_b32_e32 v3, v1
1715-
; GFX9-NEXT: v_mov_b32_e32 v1, 0
17161715
; GFX9-NEXT: v_and_b32_e32 v0, 1, v2
1716+
; GFX9-NEXT: v_mov_b32_e32 v1, 0
17171717
; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1]
17181718
; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v3
17191719
; GFX9-NEXT: v_or_b32_e32 v0, v2, v0
@@ -1749,22 +1749,20 @@ define i65 @v_lshr_i65_33(i65 %value) {
17491749
define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
17501750
; GCN-LABEL: s_lshr_i65:
17511751
; GCN: ; %bb.0:
1752-
; GCN-NEXT: s_mov_b32 s4, s3
1753-
; GCN-NEXT: s_mov_b32 s3, 0
1754-
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], 1
1755-
; GCN-NEXT: s_sub_i32 s10, s4, 64
1756-
; GCN-NEXT: s_sub_i32 s8, 64, s4
1757-
; GCN-NEXT: s_cmp_lt_u32 s4, 64
1752+
; GCN-NEXT: s_and_b64 s[4:5], s[2:3], 1
1753+
; GCN-NEXT: s_sub_i32 s10, s3, 64
1754+
; GCN-NEXT: s_sub_i32 s8, 64, s3
1755+
; GCN-NEXT: s_cmp_lt_u32 s3, 64
17581756
; GCN-NEXT: s_cselect_b32 s11, 1, 0
1759-
; GCN-NEXT: s_cmp_eq_u32 s4, 0
1757+
; GCN-NEXT: s_cmp_eq_u32 s3, 0
17601758
; GCN-NEXT: s_cselect_b32 s12, 1, 0
1761-
; GCN-NEXT: s_lshr_b64 s[6:7], s[2:3], s4
1762-
; GCN-NEXT: s_lshr_b64 s[4:5], s[0:1], s4
1763-
; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], s8
1764-
; GCN-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
1765-
; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
1759+
; GCN-NEXT: s_lshr_b64 s[6:7], s[4:5], s3
1760+
; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
1761+
; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
1762+
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
1763+
; GCN-NEXT: s_lshr_b64 s[4:5], s[4:5], s10
17661764
; GCN-NEXT: s_cmp_lg_u32 s11, 0
1767-
; GCN-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3]
1765+
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
17681766
; GCN-NEXT: s_cmp_lg_u32 s12, 0
17691767
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
17701768
; GCN-NEXT: s_cmp_lg_u32 s11, 0
@@ -1773,26 +1771,24 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
17731771
;
17741772
; GFX10PLUS-LABEL: s_lshr_i65:
17751773
; GFX10PLUS: ; %bb.0:
1776-
; GFX10PLUS-NEXT: s_mov_b32 s4, s3
1777-
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1778-
; GFX10PLUS-NEXT: s_sub_i32 s10, s4, 64
1779-
; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[2:3], 1
1780-
; GFX10PLUS-NEXT: s_sub_i32 s5, 64, s4
1781-
; GFX10PLUS-NEXT: s_cmp_lt_u32 s4, 64
1774+
; GFX10PLUS-NEXT: s_and_b64 s[4:5], s[2:3], 1
1775+
; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
1776+
; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
1777+
; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
17821778
; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
1783-
; GFX10PLUS-NEXT: s_cmp_eq_u32 s4, 0
1779+
; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
17841780
; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
1785-
; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s4
1786-
; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[2:3], s5
1787-
; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[2:3], s4
1781+
; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
1782+
; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2
1783+
; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[4:5], s3
17881784
; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
1789-
; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[2:3], s10
1785+
; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[4:5], s10
17901786
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1791-
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3]
1787+
; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
17921788
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
1793-
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
1789+
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
17941790
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
1795-
; GFX10PLUS-NEXT: s_cselect_b32 s2, s4, 0
1791+
; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, 0
17961792
; GFX10PLUS-NEXT: ; return to shader part epilog
17971793
%result = lshr i65 %value, %amount
17981794
ret i65 %result
@@ -1801,22 +1797,22 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
18011797
define amdgpu_ps i65 @s_lshr_i65_33(i65 inreg %value) {
18021798
; GCN-LABEL: s_lshr_i65_33:
18031799
; GCN: ; %bb.0:
1804-
; GCN-NEXT: s_mov_b32 s3, 0
1805-
; GCN-NEXT: s_and_b64 s[4:5], s[2:3], 1
1806-
; GCN-NEXT: s_lshr_b32 s2, s1, 1
1807-
; GCN-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1808-
; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1809-
; GCN-NEXT: s_lshr_b32 s2, s5, 1
1800+
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], 1
1801+
; GCN-NEXT: s_lshr_b32 s0, s1, 1
1802+
; GCN-NEXT: s_mov_b32 s1, 0
1803+
; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1804+
; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
1805+
; GCN-NEXT: s_lshr_b32 s2, s3, 1
18101806
; GCN-NEXT: ; return to shader part epilog
18111807
;
18121808
; GFX10PLUS-LABEL: s_lshr_i65_33:
18131809
; GFX10PLUS: ; %bb.0:
1814-
; GFX10PLUS-NEXT: s_mov_b32 s3, 0
1815-
; GFX10PLUS-NEXT: s_and_b64 s[4:5], s[2:3], 1
1816-
; GFX10PLUS-NEXT: s_lshr_b32 s2, s1, 1
1817-
; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[4:5], 31
1818-
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
1819-
; GFX10PLUS-NEXT: s_lshr_b32 s2, s5, 1
1810+
; GFX10PLUS-NEXT: s_and_b64 s[2:3], s[2:3], 1
1811+
; GFX10PLUS-NEXT: s_lshr_b32 s0, s1, 1
1812+
; GFX10PLUS-NEXT: s_mov_b32 s1, 0
1813+
; GFX10PLUS-NEXT: s_lshl_b64 s[4:5], s[2:3], 31
1814+
; GFX10PLUS-NEXT: s_lshr_b32 s2, s3, 1
1815+
; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
18201816
; GFX10PLUS-NEXT: ; return to shader part epilog
18211817
%result = lshr i65 %value, 33
18221818
ret i65 %result

0 commit comments

Comments
 (0)