Skip to content

Commit 14d006c

Browse files
AMDGPU/GlobalISel: Run redundant_and combine in RegBankCombiner (#112353)
Combine is needed to clear redundant ANDs with 1 that will be created by reg-bank-select to clean-up high bits in register. Fix replaceRegWith from CombinerHelper: If copy had to be inserted, first create copy then delete MI. If MI is deleted first insert point is not valid.
1 parent e1d205a commit 14d006c

File tree

6 files changed

+125
-100
lines changed

6 files changed

+125
-100
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
178178
if (MRI.constrainRegAttrs(ToReg, FromReg))
179179
MRI.replaceRegWith(FromReg, ToReg);
180180
else
181-
Builder.buildCopy(ToReg, FromReg);
181+
Builder.buildCopy(FromReg, ToReg);
182182

183183
Observer.finishedChangingAllUsesOfReg();
184184
}
@@ -229,8 +229,8 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
229229
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
230230
Register DstReg = MI.getOperand(0).getReg();
231231
Register SrcReg = MI.getOperand(1).getReg();
232-
MI.eraseFromParent();
233232
replaceRegWith(MRI, DstReg, SrcReg);
233+
MI.eraseFromParent();
234234
}
235235

236236
bool CombinerHelper::matchFreezeOfSingleMaybePoisonOperand(
@@ -379,8 +379,8 @@ void CombinerHelper::applyCombineConcatVectors(MachineInstr &MI,
379379
Builder.buildUndef(NewDstReg);
380380
else
381381
Builder.buildBuildVector(NewDstReg, Ops);
382-
MI.eraseFromParent();
383382
replaceRegWith(MRI, DstReg, NewDstReg);
383+
MI.eraseFromParent();
384384
}
385385

386386
bool CombinerHelper::matchCombineShuffleConcat(MachineInstr &MI,
@@ -559,8 +559,8 @@ void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
559559
else
560560
Builder.buildMergeLikeInstr(NewDstReg, Ops);
561561

562-
MI.eraseFromParent();
563562
replaceRegWith(MRI, DstReg, NewDstReg);
563+
MI.eraseFromParent();
564564
}
565565

566566
bool CombinerHelper::matchShuffleToExtract(MachineInstr &MI) {
@@ -2825,17 +2825,17 @@ void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
28252825
Register OldReg = MI.getOperand(0).getReg();
28262826
Register Replacement = MI.getOperand(OpIdx).getReg();
28272827
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2828-
MI.eraseFromParent();
28292828
replaceRegWith(MRI, OldReg, Replacement);
2829+
MI.eraseFromParent();
28302830
}
28312831

28322832
void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
28332833
Register Replacement) {
28342834
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
28352835
Register OldReg = MI.getOperand(0).getReg();
28362836
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2837-
MI.eraseFromParent();
28382837
replaceRegWith(MRI, OldReg, Replacement);
2838+
MI.eraseFromParent();
28392839
}
28402840

28412841
bool CombinerHelper::matchConstantLargerBitWidth(MachineInstr &MI,

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,5 +169,6 @@ def AMDGPURegBankCombiner : GICombiner<
169169
"AMDGPURegBankCombinerImpl",
170170
[unmerge_merge, unmerge_cst, unmerge_undef,
171171
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
172-
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
172+
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
173+
redundant_and]> {
173174
}

llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-asserts.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,8 @@ define hidden <2 x i64> @icmp_v2i32_zext_to_v2i64(<2 x i32> %arg) {
2727
; CHECK-NEXT: v_mov_b32_e32 v3, 0
2828
; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
2929
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
30-
; CHECK-NEXT: v_and_b32_e32 v0, 1, v0
31-
; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
32-
; CHECK-NEXT: v_and_b32_e32 v2, 1, v1
3330
; CHECK-NEXT: v_mov_b32_e32 v1, 0
31+
; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
3432
; CHECK-NEXT: s_setpc_b64 s[30:31]
3533
%cmp = icmp eq <2 x i32> %arg, zeroinitializer
3634
%sext = zext <2 x i1> %cmp to <2 x i64>
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
4+
---
5+
name: replaceRegWith_requires_copy
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0, $vgpr0_vgpr1
10+
11+
; CHECK-LABEL: name: replaceRegWith_requires_copy
12+
; CHECK: liveins: $sgpr0, $vgpr0_vgpr1
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(p1) = COPY $vgpr0_vgpr1
15+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
16+
; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1
17+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:sreg_32(s32) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]]
18+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY [[ICMP]](s32)
19+
; CHECK-NEXT: G_STORE [[COPY2]](s32), [[COPY]](p1) :: (store (s32), addrspace 1)
20+
; CHECK-NEXT: S_ENDPGM 0
21+
%0:sgpr(p1) = COPY $vgpr0_vgpr1
22+
%1:sgpr(s32) = COPY $sgpr0
23+
%2:sgpr(s32) = G_CONSTANT i32 1
24+
%3:sreg_32(s32) = G_ICMP intpred(ne), %1, %2
25+
%4:sgpr(s32) = G_AND %3, %2
26+
G_STORE %4(s32), %0(p1) :: (store (s32), addrspace 1)
27+
S_ENDPGM 0
28+
...

llvm/test/CodeGen/AMDGPU/fptoi.i128.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,12 @@ define i128 @fptosi_f64_to_i128(double %x) {
136136
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137137
; GISEL-NEXT: v_mov_b32_e32 v5, v1
138138
; GISEL-NEXT: v_mov_b32_e32 v4, v0
139-
; GISEL-NEXT: v_lshrrev_b32_e32 v0, 20, v5
140-
; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v0
139+
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 20, v5
141140
; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff
142141
; GISEL-NEXT: s_mov_b64 s[4:5], 0
143-
; GISEL-NEXT: v_mov_b32_e32 v1, 0
144142
; GISEL-NEXT: v_mov_b32_e32 v7, 0
143+
; GISEL-NEXT: v_mov_b32_e32 v1, 0
144+
; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v2
145145
; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
146146
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
147147
; GISEL-NEXT: v_mov_b32_e32 v0, s4
@@ -508,12 +508,12 @@ define i128 @fptoui_f64_to_i128(double %x) {
508508
; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509509
; GISEL-NEXT: v_mov_b32_e32 v5, v1
510510
; GISEL-NEXT: v_mov_b32_e32 v4, v0
511-
; GISEL-NEXT: v_lshrrev_b32_e32 v0, 20, v5
512-
; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v0
511+
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 20, v5
513512
; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff
514513
; GISEL-NEXT: s_mov_b64 s[4:5], 0
515-
; GISEL-NEXT: v_mov_b32_e32 v1, 0
516514
; GISEL-NEXT: v_mov_b32_e32 v7, 0
515+
; GISEL-NEXT: v_mov_b32_e32 v1, 0
516+
; GISEL-NEXT: v_and_b32_e32 v6, 0x7ff, v2
517517
; GISEL-NEXT: v_cmp_ge_u64_e32 vcc, v[6:7], v[0:1]
518518
; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
519519
; GISEL-NEXT: v_mov_b32_e32 v0, s4

llvm/test/CodeGen/AMDGPU/itofp.i128.ll

Lines changed: 82 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -673,38 +673,38 @@ define double @sitofp_i128_to_f64(i128 %x) {
673673
; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v3
674674
; GISEL-NEXT: v_xor_b32_e32 v0, v6, v4
675675
; GISEL-NEXT: v_xor_b32_e32 v1, v6, v5
676-
; GISEL-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v6
677-
; GISEL-NEXT: v_xor_b32_e32 v2, v6, v2
678-
; GISEL-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v6, vcc
679-
; GISEL-NEXT: v_xor_b32_e32 v3, v6, v3
680-
; GISEL-NEXT: v_subb_co_u32_e32 v2, vcc, v2, v6, vcc
681-
; GISEL-NEXT: v_ffbh_u32_e32 v5, v0
682-
; GISEL-NEXT: v_subb_co_u32_e32 v3, vcc, v3, v6, vcc
683-
; GISEL-NEXT: v_ffbh_u32_e32 v4, v1
684-
; GISEL-NEXT: v_add_u32_e32 v5, 32, v5
685-
; GISEL-NEXT: v_ffbh_u32_e32 v7, v2
686-
; GISEL-NEXT: v_min_u32_e32 v4, v4, v5
687-
; GISEL-NEXT: v_ffbh_u32_e32 v5, v3
676+
; GISEL-NEXT: v_xor_b32_e32 v4, v6, v2
677+
; GISEL-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
678+
; GISEL-NEXT: v_xor_b32_e32 v5, v6, v3
679+
; GISEL-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v6, vcc
680+
; GISEL-NEXT: v_subb_co_u32_e32 v4, vcc, v4, v6, vcc
681+
; GISEL-NEXT: v_ffbh_u32_e32 v1, v2
682+
; GISEL-NEXT: v_subb_co_u32_e32 v5, vcc, v5, v6, vcc
683+
; GISEL-NEXT: v_ffbh_u32_e32 v0, v3
684+
; GISEL-NEXT: v_add_u32_e32 v1, 32, v1
685+
; GISEL-NEXT: v_ffbh_u32_e32 v7, v4
686+
; GISEL-NEXT: v_min_u32_e32 v0, v0, v1
687+
; GISEL-NEXT: v_ffbh_u32_e32 v1, v5
688688
; GISEL-NEXT: v_add_u32_e32 v7, 32, v7
689-
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
690-
; GISEL-NEXT: v_add_u32_e32 v4, 64, v4
691-
; GISEL-NEXT: v_min_u32_e32 v5, v5, v7
692-
; GISEL-NEXT: v_cndmask_b32_e32 v9, v5, v4, vcc
689+
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
690+
; GISEL-NEXT: v_add_u32_e32 v0, 64, v0
691+
; GISEL-NEXT: v_min_u32_e32 v1, v1, v7
692+
; GISEL-NEXT: v_cndmask_b32_e32 v9, v1, v0, vcc
693693
; GISEL-NEXT: v_sub_u32_e32 v8, 0x80, v9
694694
; GISEL-NEXT: v_sub_u32_e32 v7, 0x7f, v9
695695
; GISEL-NEXT: v_cmp_ge_i32_e32 vcc, 53, v8
696696
; GISEL-NEXT: ; implicit-def: $vgpr10
697-
; GISEL-NEXT: ; implicit-def: $vgpr4_vgpr5
697+
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
698698
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
699699
; GISEL-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
700700
; GISEL-NEXT: ; %bb.2: ; %itofp-if-else
701-
; GISEL-NEXT: v_add_u32_e32 v2, 0xffffffb5, v9
702-
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
703-
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v2
704-
; GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v0, vcc
701+
; GISEL-NEXT: v_add_u32_e32 v4, 0xffffffb5, v9
702+
; GISEL-NEXT: v_lshlrev_b64 v[0:1], v4, v[2:3]
703+
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4
704+
; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
705705
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v1, vcc
706706
; GISEL-NEXT: ; implicit-def: $vgpr8
707-
; GISEL-NEXT: ; implicit-def: $vgpr0
707+
; GISEL-NEXT: ; implicit-def: $vgpr2
708708
; GISEL-NEXT: ; implicit-def: $vgpr9
709709
; GISEL-NEXT: ; %bb.3: ; %Flow3
710710
; GISEL-NEXT: s_andn2_saveexec_b64 s[8:9], s[4:5]
@@ -721,89 +721,88 @@ define double @sitofp_i128_to_f64(i128 %x) {
721721
; GISEL-NEXT: ; %bb.6: ; %itofp-sw-default
722722
; GISEL-NEXT: v_sub_u32_e32 v14, 0x49, v9
723723
; GISEL-NEXT: v_sub_u32_e32 v10, 64, v14
724-
; GISEL-NEXT: v_lshrrev_b64 v[4:5], v14, v[0:1]
725-
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[2:3]
724+
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v14, v[2:3]
725+
; GISEL-NEXT: v_lshlrev_b64 v[10:11], v10, v[4:5]
726726
; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v14
727-
; GISEL-NEXT: v_or_b32_e32 v10, v4, v10
728-
; GISEL-NEXT: v_or_b32_e32 v11, v5, v11
729-
; GISEL-NEXT: v_lshrrev_b64 v[4:5], v15, v[2:3]
730-
; GISEL-NEXT: v_lshrrev_b64 v[12:13], v14, v[2:3]
727+
; GISEL-NEXT: v_lshrrev_b64 v[12:13], v14, v[4:5]
728+
; GISEL-NEXT: v_or_b32_e32 v10, v0, v10
729+
; GISEL-NEXT: v_or_b32_e32 v11, v1, v11
730+
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v15, v[4:5]
731731
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
732+
; GISEL-NEXT: v_add_u32_e32 v9, 55, v9
733+
; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc
734+
; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc
732735
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
733-
; GISEL-NEXT: v_add_u32_e32 v14, 55, v9
734-
; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc
735-
; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc
736-
; GISEL-NEXT: v_sub_u32_e32 v11, 64, v14
737-
; GISEL-NEXT: v_cndmask_b32_e64 v13, v4, v0, s[4:5]
738-
; GISEL-NEXT: v_cndmask_b32_e64 v4, v5, v1, s[4:5]
739-
; GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v12, vcc
740-
; GISEL-NEXT: v_lshrrev_b64 v[9:10], v14, -1
741-
; GISEL-NEXT: v_lshlrev_b64 v[11:12], v11, -1
742-
; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v14
743-
; GISEL-NEXT: v_or_b32_e32 v16, v9, v11
744-
; GISEL-NEXT: v_or_b32_e32 v17, v10, v12
745-
; GISEL-NEXT: v_lshrrev_b64 v[11:12], v15, -1
746-
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14
747-
; GISEL-NEXT: v_cndmask_b32_e32 v11, v11, v16, vcc
748-
; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v17, vcc
749-
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14
750-
; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v9, vcc
751-
; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v10, vcc
752-
; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, -1, s[4:5]
753-
; GISEL-NEXT: v_cndmask_b32_e64 v12, v12, -1, s[4:5]
754-
; GISEL-NEXT: v_and_b32_e32 v2, v9, v2
755-
; GISEL-NEXT: v_and_b32_e32 v3, v10, v3
756-
; GISEL-NEXT: v_and_or_b32 v0, v11, v0, v2
757-
; GISEL-NEXT: v_and_or_b32 v1, v12, v1, v3
736+
; GISEL-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc
737+
; GISEL-NEXT: v_sub_u32_e32 v12, 64, v9
738+
; GISEL-NEXT: v_cndmask_b32_e64 v14, v0, v2, s[4:5]
739+
; GISEL-NEXT: v_cndmask_b32_e64 v10, v1, v3, s[4:5]
740+
; GISEL-NEXT: v_lshrrev_b64 v[0:1], v9, -1
741+
; GISEL-NEXT: v_lshlrev_b64 v[12:13], v12, -1
742+
; GISEL-NEXT: v_subrev_u32_e32 v15, 64, v9
743+
; GISEL-NEXT: v_or_b32_e32 v16, v0, v12
744+
; GISEL-NEXT: v_or_b32_e32 v17, v1, v13
745+
; GISEL-NEXT: v_lshrrev_b64 v[12:13], v15, -1
746+
; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v9
747+
; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v16, vcc
748+
; GISEL-NEXT: v_cndmask_b32_e32 v13, v13, v17, vcc
749+
; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v9
750+
; GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc
751+
; GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc
752+
; GISEL-NEXT: v_cndmask_b32_e64 v9, v12, -1, s[4:5]
753+
; GISEL-NEXT: v_cndmask_b32_e64 v12, v13, -1, s[4:5]
754+
; GISEL-NEXT: v_and_b32_e32 v0, v0, v4
755+
; GISEL-NEXT: v_and_b32_e32 v1, v1, v5
756+
; GISEL-NEXT: v_and_or_b32 v0, v9, v2, v0
757+
; GISEL-NEXT: v_and_or_b32 v1, v12, v3, v1
758758
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
759759
; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
760-
; GISEL-NEXT: v_or_b32_e32 v3, v13, v0
761-
; GISEL-NEXT: v_mov_b32_e32 v0, v3
762-
; GISEL-NEXT: v_mov_b32_e32 v1, v4
763-
; GISEL-NEXT: v_mov_b32_e32 v2, v5
764-
; GISEL-NEXT: v_mov_b32_e32 v3, v6
760+
; GISEL-NEXT: v_or_b32_e32 v9, v14, v0
761+
; GISEL-NEXT: v_mov_b32_e32 v2, v9
762+
; GISEL-NEXT: v_mov_b32_e32 v3, v10
763+
; GISEL-NEXT: v_mov_b32_e32 v4, v11
764+
; GISEL-NEXT: v_mov_b32_e32 v5, v12
765765
; GISEL-NEXT: .LBB2_7: ; %Flow1
766766
; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
767767
; GISEL-NEXT: .LBB2_8: ; %Flow2
768768
; GISEL-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11]
769769
; GISEL-NEXT: s_cbranch_execz .LBB2_10
770770
; GISEL-NEXT: ; %bb.9: ; %itofp-sw-bb
771-
; GISEL-NEXT: v_lshlrev_b64 v[9:10], 1, v[0:1]
772-
; GISEL-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3]
773-
; GISEL-NEXT: v_lshrrev_b32_e32 v0, 31, v1
774-
; GISEL-NEXT: v_or_b32_e32 v11, v2, v0
775-
; GISEL-NEXT: v_mov_b32_e32 v0, v9
776-
; GISEL-NEXT: v_mov_b32_e32 v1, v10
777-
; GISEL-NEXT: v_mov_b32_e32 v2, v11
778-
; GISEL-NEXT: v_mov_b32_e32 v3, v12
771+
; GISEL-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5]
772+
; GISEL-NEXT: v_lshlrev_b64 v[0:1], 1, v[2:3]
773+
; GISEL-NEXT: v_lshrrev_b32_e32 v2, 31, v3
774+
; GISEL-NEXT: v_or_b32_e32 v2, v4, v2
775+
; GISEL-NEXT: v_mov_b32_e32 v5, v3
776+
; GISEL-NEXT: v_mov_b32_e32 v4, v2
777+
; GISEL-NEXT: v_mov_b32_e32 v3, v1
778+
; GISEL-NEXT: v_mov_b32_e32 v2, v0
779779
; GISEL-NEXT: .LBB2_10: ; %itofp-sw-epilog
780780
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
781-
; GISEL-NEXT: v_bfe_u32 v3, v0, 2, 1
782-
; GISEL-NEXT: v_or_b32_e32 v0, v0, v3
783-
; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
784-
; GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
785-
; GISEL-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
786-
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 2, v[0:1]
781+
; GISEL-NEXT: v_bfe_u32 v0, v2, 2, 1
782+
; GISEL-NEXT: v_or_b32_e32 v0, v2, v0
783+
; GISEL-NEXT: v_add_co_u32_e32 v2, vcc, 1, v0
784+
; GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
785+
; GISEL-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc
786+
; GISEL-NEXT: v_lshrrev_b64 v[0:1], 2, v[2:3]
787787
; GISEL-NEXT: v_mov_b32_e32 v9, 0
788-
; GISEL-NEXT: v_and_b32_e32 v10, 0x800000, v1
788+
; GISEL-NEXT: v_and_b32_e32 v10, 0x800000, v3
789789
; GISEL-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[9:10]
790-
; GISEL-NEXT: v_lshl_or_b32 v10, v2, 30, v5
790+
; GISEL-NEXT: v_lshl_or_b32 v10, v4, 30, v1
791791
; GISEL-NEXT: s_and_saveexec_b64 s[4:5], vcc
792792
; GISEL-NEXT: ; %bb.11: ; %itofp-if-then20
793-
; GISEL-NEXT: v_lshrrev_b64 v[4:5], 3, v[0:1]
793+
; GISEL-NEXT: v_lshrrev_b64 v[0:1], 3, v[2:3]
794794
; GISEL-NEXT: v_mov_b32_e32 v7, v8
795-
; GISEL-NEXT: v_lshl_or_b32 v10, v2, 29, v5
795+
; GISEL-NEXT: v_lshl_or_b32 v10, v4, 29, v1
796796
; GISEL-NEXT: ; %bb.12: ; %Flow
797797
; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
798798
; GISEL-NEXT: .LBB2_13: ; %Flow4
799799
; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
800-
; GISEL-NEXT: v_and_b32_e32 v0, 0x80000000, v6
801-
; GISEL-NEXT: v_mov_b32_e32 v1, 0x3ff00000
802-
; GISEL-NEXT: v_mov_b32_e32 v2, 0xfffff
803-
; GISEL-NEXT: v_lshl_add_u32 v1, v7, 20, v1
804-
; GISEL-NEXT: v_and_or_b32 v2, v10, v2, v0
805-
; GISEL-NEXT: v_and_or_b32 v0, v4, -1, 0
806-
; GISEL-NEXT: v_or3_b32 v1, v2, v1, 0
800+
; GISEL-NEXT: v_and_b32_e32 v1, 0x80000000, v6
801+
; GISEL-NEXT: v_mov_b32_e32 v2, 0x3ff00000
802+
; GISEL-NEXT: v_mov_b32_e32 v3, 0xfffff
803+
; GISEL-NEXT: v_lshl_add_u32 v2, v7, 20, v2
804+
; GISEL-NEXT: v_and_or_b32 v1, v10, v3, v1
805+
; GISEL-NEXT: v_or3_b32 v1, v1, v2, 0
807806
; GISEL-NEXT: .LBB2_14: ; %Flow5
808807
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
809808
; GISEL-NEXT: s_setpc_b64 s[30:31]
@@ -1083,7 +1082,6 @@ define double @uitofp_i128_to_f64(i128 %x) {
10831082
; GISEL-NEXT: v_mov_b32_e32 v0, 0x3ff00000
10841083
; GISEL-NEXT: v_lshl_add_u32 v0, v6, 20, v0
10851084
; GISEL-NEXT: v_and_b32_e32 v1, 0xfffff, v9
1086-
; GISEL-NEXT: v_and_or_b32 v4, v4, -1, 0
10871085
; GISEL-NEXT: v_or3_b32 v5, v1, v0, 0
10881086
; GISEL-NEXT: .LBB3_14: ; %Flow5
10891087
; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]

0 commit comments

Comments
 (0)