Skip to content

Commit b5c4592

Browse files
author
Thorsten Schütt
committed
address review comments
re-add lost combine
1 parent a6b9d99 commit b5c4592

File tree

11 files changed

+67
-165
lines changed

11 files changed

+67
-165
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -918,7 +918,7 @@ class CombinerHelper {
918918
bool matchCanonicalizeICmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
919919
bool matchCanonicalizeFCmp(const MachineInstr &MI, BuildFnTy &MatchInfo);
920920

921-
// unmerge_values anyext build vector
921+
// unmerge_values(anyext(build vector)) -> build vector(anyext)
922922
bool matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
923923
BuildFnTy &MatchInfo);
924924

llvm/include/llvm/Target/GlobalISel/Combine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1926,7 +1926,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
19261926
reassocs, ptr_add_immed_chain, cmp_combines,
19271927
shl_ashr_to_sext_inreg, sext_inreg_of_load,
19281928
width_reduction_combines, select_combines,
1929-
known_bits_simplifications,
1929+
known_bits_simplifications, trunc_shift,
19301930
not_cmp_fold, opt_brcond_by_inverting_cond,
19311931
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
19321932
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7648,21 +7648,21 @@ bool CombinerHelper::matchUnmergeValuesAnyExtBuildVector(const MachineInstr &MI,
76487648
{TargetOpcode::G_BUILD_VECTOR, {SmallBvTy, SmallBvElemenTy}}))
76497649
return false;
76507650

7651-
// check scalar anyext
7651+
// We check the legality of scalar anyext.
76527652
if (!isLegalOrBeforeLegalizer(
76537653
{TargetOpcode::G_ANYEXT,
76547654
{SmallBvElemenTy, BigBvTy.getElementType()}}))
76557655
return false;
76567656

76577657
MatchInfo = [=](MachineIRBuilder &B) {
7658-
// build into each G_UNMERGE_VALUES def
7659-
// a small build vector with anyext from the source build vector
7658+
// Build into each G_UNMERGE_VALUES def
7659+
// a small build vector with anyext from the source build vector.
76607660
for (unsigned I = 0; I < Unmerge->getNumDefs(); ++I) {
76617661
SmallVector<Register> Ops;
76627662
for (unsigned J = 0; J < SmallBvTy.getNumElements(); ++J) {
7663-
auto AnyExt = B.buildAnyExt(
7664-
SmallBvElemenTy,
7665-
BV->getSourceReg(I * SmallBvTy.getNumElements() + J));
7663+
Register SourceArray =
7664+
BV->getSourceReg(I * SmallBvTy.getNumElements() + J);
7665+
auto AnyExt = B.buildAnyExt(SmallBvElemenTy, SourceArray);
76667666
Ops.push_back(AnyExt.getReg(0));
76677667
}
76687668
B.buildBuildVector(Unmerge->getOperand(I).getReg(), Ops);

llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,9 @@ body: |
2626
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
2727
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = nsw G_SHL [[LOAD]], [[C1]](s32)
2828
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[SHL]], [[C]]
29-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
30-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = nsw G_SHL [[MUL]], [[C2]](s32)
31-
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[SHL1]](s32)
32-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
33-
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C3]](s64)
34-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SHL2]](s64)
35-
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
29+
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
30+
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[MUL]], [[C2]](s64)
31+
; CHECK-NEXT: $w0 = COPY [[SHL1]](s32)
3632
; CHECK-NEXT: RET_ReallyLR implicit $w0
3733
bb.1:
3834
liveins: $x0

llvm/test/CodeGen/AArch64/GlobalISel/combine-trunc.mir

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,23 @@ legalized: true
141141
body: |
142142
bb.1:
143143
liveins: $w0
144-
; CHECK-LABEL: name: test_combine_trunc_shl_s32_by_2
145-
; CHECK: liveins: $w0
146-
; CHECK-NEXT: {{ $}}
147-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
148-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
149-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
150-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
151-
; CHECK-NEXT: $h0 = COPY [[TRUNC]](s16)
144+
; CHECK-PRE-LABEL: name: test_combine_trunc_shl_s32_by_2
145+
; CHECK-PRE: liveins: $w0
146+
; CHECK-PRE-NEXT: {{ $}}
147+
; CHECK-PRE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
148+
; CHECK-PRE-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
149+
; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
150+
; CHECK-PRE-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s32)
151+
; CHECK-PRE-NEXT: $h0 = COPY [[SHL]](s16)
152+
;
153+
; CHECK-POST-LABEL: name: test_combine_trunc_shl_s32_by_2
154+
; CHECK-POST: liveins: $w0
155+
; CHECK-POST-NEXT: {{ $}}
156+
; CHECK-POST-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
157+
; CHECK-POST-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
158+
; CHECK-POST-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
159+
; CHECK-POST-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
160+
; CHECK-POST-NEXT: $h0 = COPY [[TRUNC]](s16)
152161
%0:_(s32) = COPY $w0
153162
%1:_(s32) = G_CONSTANT i32 2
154163
%2:_(s32) = G_SHL %0(s32), %1(s32)

llvm/test/CodeGen/AArch64/setcc_knownbits.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,18 @@ land.end: ; preds = %land.rhs, %entry
5757

5858
declare i64 @llvm.ctlz.i64(i64 %in, i1)
5959
define i1 @lshr_ctlz_undef_cmpeq_one_i64(i64 %in) {
60-
; CHECK-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
61-
; CHECK: // %bb.0:
62-
; CHECK-NEXT: clz x8, x0
63-
; CHECK-NEXT: lsr x0, x8, #6
64-
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
65-
; CHECK-NEXT: ret
60+
; CHECK-SD-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
61+
; CHECK-SD: // %bb.0:
62+
; CHECK-SD-NEXT: clz x8, x0
63+
; CHECK-SD-NEXT: lsr x0, x8, #6
64+
; CHECK-SD-NEXT: // kill: def $w0 killed $w0 killed $x0
65+
; CHECK-SD-NEXT: ret
66+
;
67+
; CHECK-GI-LABEL: lshr_ctlz_undef_cmpeq_one_i64:
68+
; CHECK-GI: // %bb.0:
69+
; CHECK-GI-NEXT: clz x8, x0
70+
; CHECK-GI-NEXT: lsr w0, w8, #6
71+
; CHECK-GI-NEXT: ret
6672
%ctlz = call i64 @llvm.ctlz.i64(i64 %in, i1 -1)
6773
%lshr = lshr i64 %ctlz, 6
6874
%icmp = icmp eq i64 %lshr, 1

llvm/test/CodeGen/AArch64/shift-logic.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,8 +280,7 @@ define void @apint_type_mismatch(i16 %a, ptr %p) {
280280
; CHECK-GISEL: // %bb.0: // %entry
281281
; CHECK-GISEL-NEXT: ubfx w8, w0, #3, #13
282282
; CHECK-GISEL-NEXT: and w8, w8, #0xff
283-
; CHECK-GISEL-NEXT: and x8, x8, #0xffff
284-
; CHECK-GISEL-NEXT: lsl x8, x8, #3
283+
; CHECK-GISEL-NEXT: lsl w8, w8, #3
285284
; CHECK-GISEL-NEXT: str w8, [x1]
286285
; CHECK-GISEL-NEXT: ret
287286
entry:

llvm/test/CodeGen/AMDGPU/GlobalISel/combine-trunc-shift.mir

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,9 @@ body: |
1414
; CHECK-NEXT: {{ $}}
1515
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
1616
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
17-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[C]](s32)
18-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SHL]](s64)
19-
; CHECK-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
17+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
18+
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[TRUNC]], [[C]](s32)
19+
; CHECK-NEXT: $vgpr0 = COPY [[SHL]](s32)
2020
%0:_(s64) = COPY $vgpr0_vgpr1
2121
%1:_(s32) = G_CONSTANT i32 1
2222
%2:_(s64) = G_SHL %0:_, %1
@@ -82,10 +82,9 @@ body: |
8282
; CHECK: liveins: $vgpr0
8383
; CHECK-NEXT: {{ $}}
8484
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
85-
; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32)
8685
; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 16
87-
; CHECK-NEXT: %shift:_(s64) = G_LSHR %src, %amt(s32)
88-
; CHECK-NEXT: %trunc:_(s16) = G_TRUNC %shift(s64)
86+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], %amt(s32)
87+
; CHECK-NEXT: %trunc:_(s16) = G_TRUNC [[LSHR]](s32)
8988
; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55
9089
; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16)
9190
; CHECK-NEXT: $vgpr0 = COPY %keep(s32)
@@ -109,10 +108,9 @@ body: |
109108
; CHECK: liveins: $vgpr0
110109
; CHECK-NEXT: {{ $}}
111110
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
112-
; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32)
113111
; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 16
114-
; CHECK-NEXT: %shift:_(s64) = G_ASHR %src, %amt(s32)
115-
; CHECK-NEXT: %trunc:_(s16) = G_TRUNC %shift(s64)
112+
; CHECK-NEXT: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], %amt(s32)
113+
; CHECK-NEXT: %trunc:_(s16) = G_TRUNC [[ASHR]](s32)
116114
; CHECK-NEXT: %foo:_(s16) = G_CONSTANT i16 55
117115
; CHECK-NEXT: %keep:_(s32) = G_MERGE_VALUES %trunc(s16), %foo(s16)
118116
; CHECK-NEXT: $vgpr0 = COPY %keep(s32)
@@ -163,10 +161,9 @@ body: |
163161
; CHECK: liveins: $vgpr0
164162
; CHECK-NEXT: {{ $}}
165163
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
166-
; CHECK-NEXT: %src:_(s64) = G_ZEXT [[COPY]](s32)
167164
; CHECK-NEXT: %amt:_(s32) = G_CONSTANT i32 6
168-
; CHECK-NEXT: %shift:_(s64) = G_LSHR %src, %amt(s32)
169-
; CHECK-NEXT: %trunc:_(s26) = G_TRUNC %shift(s64)
165+
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], %amt(s32)
166+
; CHECK-NEXT: %trunc:_(s26) = G_TRUNC [[LSHR]](s32)
170167
; CHECK-NEXT: %foo:_(s26) = G_CONSTANT i26 55
171168
; CHECK-NEXT: %keep0:_(s26) = G_ADD %trunc, %foo
172169
; CHECK-NEXT: %keep1:_(s32) = G_ANYEXT %keep0(s26)

llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -47,23 +47,20 @@ define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) {
4747
;
4848
; GFX8-LABEL: s_sext_inreg_i8:
4949
; GFX8: ; %bb.0:
50-
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
5150
; GFX8-NEXT: s_lshl_b32 s0, s0, 3
5251
; GFX8-NEXT: s_sext_i32_i8 s0, s0
5352
; GFX8-NEXT: s_ashr_i32 s0, s0, 3
5453
; GFX8-NEXT: ; return to shader part epilog
5554
;
5655
; GFX9-LABEL: s_sext_inreg_i8:
5756
; GFX9: ; %bb.0:
58-
; GFX9-NEXT: s_and_b32 s0, s0, 0xff
5957
; GFX9-NEXT: s_lshl_b32 s0, s0, 3
6058
; GFX9-NEXT: s_sext_i32_i8 s0, s0
6159
; GFX9-NEXT: s_ashr_i32 s0, s0, 3
6260
; GFX9-NEXT: ; return to shader part epilog
6361
;
6462
; GFX10PLUS-LABEL: s_sext_inreg_i8:
6563
; GFX10PLUS: ; %bb.0:
66-
; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff
6764
; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 3
6865
; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
6966
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 3
@@ -81,23 +78,20 @@ define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) {
8178
;
8279
; GFX8-LABEL: s_sext_inreg_i8_6:
8380
; GFX8: ; %bb.0:
84-
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
8581
; GFX8-NEXT: s_lshl_b32 s0, s0, 6
8682
; GFX8-NEXT: s_sext_i32_i8 s0, s0
8783
; GFX8-NEXT: s_ashr_i32 s0, s0, 6
8884
; GFX8-NEXT: ; return to shader part epilog
8985
;
9086
; GFX9-LABEL: s_sext_inreg_i8_6:
9187
; GFX9: ; %bb.0:
92-
; GFX9-NEXT: s_and_b32 s0, s0, 0xff
9388
; GFX9-NEXT: s_lshl_b32 s0, s0, 6
9489
; GFX9-NEXT: s_sext_i32_i8 s0, s0
9590
; GFX9-NEXT: s_ashr_i32 s0, s0, 6
9691
; GFX9-NEXT: ; return to shader part epilog
9792
;
9893
; GFX10PLUS-LABEL: s_sext_inreg_i8_6:
9994
; GFX10PLUS: ; %bb.0:
100-
; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff
10195
; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 6
10296
; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0
10397
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 6
@@ -584,23 +578,20 @@ define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) {
584578
;
585579
; GFX8-LABEL: s_sext_inreg_i16_9:
586580
; GFX8: ; %bb.0:
587-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
588581
; GFX8-NEXT: s_lshl_b32 s0, s0, 9
589582
; GFX8-NEXT: s_sext_i32_i16 s0, s0
590583
; GFX8-NEXT: s_ashr_i32 s0, s0, 9
591584
; GFX8-NEXT: ; return to shader part epilog
592585
;
593586
; GFX9-LABEL: s_sext_inreg_i16_9:
594587
; GFX9: ; %bb.0:
595-
; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
596588
; GFX9-NEXT: s_lshl_b32 s0, s0, 9
597589
; GFX9-NEXT: s_sext_i32_i16 s0, s0
598590
; GFX9-NEXT: s_ashr_i32 s0, s0, 9
599591
; GFX9-NEXT: ; return to shader part epilog
600592
;
601593
; GFX10PLUS-LABEL: s_sext_inreg_i16_9:
602594
; GFX10PLUS: ; %bb.0:
603-
; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff
604595
; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9
605596
; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
606597
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 9
@@ -618,23 +609,20 @@ define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) {
618609
;
619610
; GFX8-LABEL: s_sext_inreg_i16_15:
620611
; GFX8: ; %bb.0:
621-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
622612
; GFX8-NEXT: s_lshl_b32 s0, s0, 15
623613
; GFX8-NEXT: s_sext_i32_i16 s0, s0
624614
; GFX8-NEXT: s_ashr_i32 s0, s0, 15
625615
; GFX8-NEXT: ; return to shader part epilog
626616
;
627617
; GFX9-LABEL: s_sext_inreg_i16_15:
628618
; GFX9: ; %bb.0:
629-
; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
630619
; GFX9-NEXT: s_lshl_b32 s0, s0, 15
631620
; GFX9-NEXT: s_sext_i32_i16 s0, s0
632621
; GFX9-NEXT: s_ashr_i32 s0, s0, 15
633622
; GFX9-NEXT: ; return to shader part epilog
634623
;
635624
; GFX10PLUS-LABEL: s_sext_inreg_i16_15:
636625
; GFX10PLUS: ; %bb.0:
637-
; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff
638626
; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15
639627
; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0
640628
; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15
@@ -732,7 +720,6 @@ define amdgpu_ps i32 @s_sext_inreg_v2i16_11(<2 x i16> inreg %value) {
732720
; GFX8-LABEL: s_sext_inreg_v2i16_11:
733721
; GFX8: ; %bb.0:
734722
; GFX8-NEXT: s_lshr_b32 s1, s0, 16
735-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
736723
; GFX8-NEXT: s_lshl_b32 s0, s0, 11
737724
; GFX8-NEXT: s_lshl_b32 s1, s1, 11
738725
; GFX8-NEXT: s_sext_i32_i16 s0, s0
@@ -867,9 +854,7 @@ define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) {
867854
; GFX8-LABEL: s_sext_inreg_v4i16_14:
868855
; GFX8: ; %bb.0:
869856
; GFX8-NEXT: s_lshr_b32 s2, s0, 16
870-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
871857
; GFX8-NEXT: s_lshr_b32 s3, s1, 16
872-
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
873858
; GFX8-NEXT: s_lshl_b32 s0, s0, 14
874859
; GFX8-NEXT: s_lshl_b32 s2, s2, 14
875860
; GFX8-NEXT: s_lshl_b32 s1, s1, 14
@@ -1083,19 +1068,15 @@ define amdgpu_ps <4 x i32> @s_sext_inreg_v8i16_5(<8 x i16> inreg %value) {
10831068
; GFX8-LABEL: s_sext_inreg_v8i16_5:
10841069
; GFX8: ; %bb.0:
10851070
; GFX8-NEXT: s_lshr_b32 s4, s0, 16
1086-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
10871071
; GFX8-NEXT: s_lshr_b32 s5, s1, 16
1088-
; GFX8-NEXT: s_and_b32 s1, s1, 0xffff
10891072
; GFX8-NEXT: s_lshl_b32 s0, s0, 5
10901073
; GFX8-NEXT: s_lshl_b32 s4, s4, 5
10911074
; GFX8-NEXT: s_lshr_b32 s6, s2, 16
1092-
; GFX8-NEXT: s_and_b32 s2, s2, 0xffff
10931075
; GFX8-NEXT: s_lshl_b32 s1, s1, 5
10941076
; GFX8-NEXT: s_lshl_b32 s5, s5, 5
10951077
; GFX8-NEXT: s_sext_i32_i16 s0, s0
10961078
; GFX8-NEXT: s_sext_i32_i16 s4, s4
10971079
; GFX8-NEXT: s_lshr_b32 s7, s3, 16
1098-
; GFX8-NEXT: s_and_b32 s3, s3, 0xffff
10991080
; GFX8-NEXT: s_lshl_b32 s2, s2, 5
11001081
; GFX8-NEXT: s_lshl_b32 s6, s6, 5
11011082
; GFX8-NEXT: s_sext_i32_i16 s1, s1

llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -91,26 +91,13 @@ define amdgpu_ps i8 @s_shl_i8(i8 inreg %value, i8 inreg %amount) {
9191
}
9292

9393
define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) {
94-
; GFX6-LABEL: s_shl_i8_7:
95-
; GFX6: ; %bb.0:
96-
; GFX6-NEXT: s_lshl_b32 s0, s0, 7
97-
; GFX6-NEXT: ; return to shader part epilog
98-
;
99-
; GFX8-LABEL: s_shl_i8_7:
100-
; GFX8: ; %bb.0:
101-
; GFX8-NEXT: s_and_b32 s0, s0, 0xff
102-
; GFX8-NEXT: s_lshl_b32 s0, s0, 7
103-
; GFX8-NEXT: ; return to shader part epilog
104-
;
105-
; GFX9-LABEL: s_shl_i8_7:
106-
; GFX9: ; %bb.0:
107-
; GFX9-NEXT: s_and_b32 s0, s0, 0xff
108-
; GFX9-NEXT: s_lshl_b32 s0, s0, 7
109-
; GFX9-NEXT: ; return to shader part epilog
94+
; GCN-LABEL: s_shl_i8_7:
95+
; GCN: ; %bb.0:
96+
; GCN-NEXT: s_lshl_b32 s0, s0, 7
97+
; GCN-NEXT: ; return to shader part epilog
11098
;
11199
; GFX10PLUS-LABEL: s_shl_i8_7:
112100
; GFX10PLUS: ; %bb.0:
113-
; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff
114101
; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7
115102
; GFX10PLUS-NEXT: ; return to shader part epilog
116103
%result = shl i8 %value, 7
@@ -660,26 +647,13 @@ define amdgpu_ps i16 @s_shl_i16(i16 inreg %value, i16 inreg %amount) {
660647
}
661648

662649
define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) {
663-
; GFX6-LABEL: s_shl_i16_15:
664-
; GFX6: ; %bb.0:
665-
; GFX6-NEXT: s_lshl_b32 s0, s0, 15
666-
; GFX6-NEXT: ; return to shader part epilog
667-
;
668-
; GFX8-LABEL: s_shl_i16_15:
669-
; GFX8: ; %bb.0:
670-
; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
671-
; GFX8-NEXT: s_lshl_b32 s0, s0, 15
672-
; GFX8-NEXT: ; return to shader part epilog
673-
;
674-
; GFX9-LABEL: s_shl_i16_15:
675-
; GFX9: ; %bb.0:
676-
; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
677-
; GFX9-NEXT: s_lshl_b32 s0, s0, 15
678-
; GFX9-NEXT: ; return to shader part epilog
650+
; GCN-LABEL: s_shl_i16_15:
651+
; GCN: ; %bb.0:
652+
; GCN-NEXT: s_lshl_b32 s0, s0, 15
653+
; GCN-NEXT: ; return to shader part epilog
679654
;
680655
; GFX10PLUS-LABEL: s_shl_i16_15:
681656
; GFX10PLUS: ; %bb.0:
682-
; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff
683657
; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15
684658
; GFX10PLUS-NEXT: ; return to shader part epilog
685659
%result = shl i16 %value, 15

0 commit comments

Comments
 (0)