Skip to content

Commit e4d221e

Browse files
committed
[GISel][CombinerHelper] Combine and(trunc(x), trunc(y)) -> trunc(and(x, y))
1 parent 26cff6e commit e4d221e

File tree

11 files changed

+225
-246
lines changed

11 files changed

+225
-246
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3060,6 +3060,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
30603060
MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
30613061
if (!LeftHandInst || !RightHandInst)
30623062
return false;
3063+
30633064
unsigned HandOpcode = LeftHandInst->getOpcode();
30643065
if (HandOpcode != RightHandInst->getOpcode())
30653066
return false;
@@ -3083,8 +3084,10 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
30833084
return false;
30843085
case TargetOpcode::G_ANYEXT:
30853086
case TargetOpcode::G_SEXT:
3086-
case TargetOpcode::G_ZEXT: {
3087+
case TargetOpcode::G_ZEXT:
3088+
case TargetOpcode::G_TRUNC: {
30873089
// Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3090+
// Match: logic (trunc X), (trunc Y) -> trunc (logic X, Y)
30883091
break;
30893092
}
30903093
case TargetOpcode::G_AND:

llvm/test/CodeGen/AArch64/GlobalISel/combine-and-trunc.mir

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,8 @@ body: |
1111
; CHECK-NEXT: {{ $}}
1212
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
1313
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
14-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
15-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
16-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[TRUNC1]]
17-
; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[AND]](s16)
18-
; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
14+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[COPY1]]
15+
; CHECK-NEXT: $w0 = COPY [[AND]](s32)
1916
%0:_(s32) = COPY $w0
2017
%1:_(s32) = COPY $w1
2118
%2:_(s16) = G_TRUNC %0
@@ -88,10 +85,9 @@ body: |
8885
; CHECK-NEXT: {{ $}}
8986
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
9087
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
91-
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY]](<4 x s32>)
92-
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[COPY1]](<4 x s32>)
93-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC]], [[TRUNC1]]
94-
; CHECK-NEXT: $x0 = COPY [[AND]](<4 x s16>)
88+
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[COPY]], [[COPY1]]
89+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[AND]](<4 x s32>)
90+
; CHECK-NEXT: $x0 = COPY [[TRUNC]](<4 x s16>)
9591
%0:_(<4 x s32>) = COPY $q0
9692
%1:_(<4 x s32>) = COPY $q1
9793
%2:_(<4 x s16>) = G_TRUNC %0

llvm/test/CodeGen/AArch64/GlobalISel/combine-select.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2-
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown- --aarch64postlegalizercombiner-only-enable-rule="select_to_logical" %s -o - | FileCheck %s
2+
# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
33
# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
44
# REQUIRES: asserts
5+
56
---
67
# select (c, x, x) -> x
78
name: test_combine_select_same_res

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-narrow-binop-feeding-add.mir

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,9 @@ body: |
8484
; CHECK: liveins: $x0, $x1
8585
; CHECK: %binop_lhs:_(s64) = COPY $x0
8686
; CHECK: %binop_rhs:_(s64) = COPY $x1
87-
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
88-
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
89-
; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[TRUNC1]]
90-
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[AND]](s32)
87+
; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND %binop_lhs, %binop_rhs
88+
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
89+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
9190
; CHECK: $x0 = COPY [[ZEXT]](s64)
9291
; CHECK: RET_ReallyLR implicit $x0
9392
%binop_lhs:_(s64) = COPY $x0
@@ -131,10 +130,9 @@ body: |
131130
; CHECK: liveins: $x0, $x1
132131
; CHECK: %binop_lhs:_(s64) = COPY $x0
133132
; CHECK: %binop_rhs:_(s64) = COPY $x1
134-
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %binop_lhs(s64)
135-
; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %binop_rhs(s64)
136-
; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]]
137-
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[XOR]](s32)
133+
; CHECK: [[XOR:%[0-9]+]]:_(s64) = G_XOR %binop_lhs, %binop_rhs
134+
; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[XOR]](s64)
135+
; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s32)
138136
; CHECK: $x0 = COPY [[ZEXT]](s64)
139137
; CHECK: RET_ReallyLR implicit $x0
140138
%binop_lhs:_(s64) = COPY $x0

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-hoist-same-hands.mir

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,10 +268,9 @@ body: |
268268
; CHECK: liveins: $w0, $w1
269269
; CHECK: %x_wide:_(s32) = COPY $w0
270270
; CHECK: %y_wide:_(s32) = COPY $w1
271-
; CHECK: %x:_(s1) = G_TRUNC %x_wide(s32)
272-
; CHECK: %y:_(s1) = G_TRUNC %y_wide(s32)
273-
; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR %x, %y
274-
; CHECK: %logic_op:_(s64) = G_SEXT [[OR]](s1)
271+
; CHECK: %8:_(s32) = G_OR %x_wide, %y_wide
272+
; CHECK: %7:_(s1) = G_TRUNC %8(s32)
273+
; CHECK: %logic_op:_(s64) = G_SEXT %7(s1)
275274
; CHECK: $x0 = COPY %logic_op(s64)
276275
; CHECK: RET_ReallyLR implicit $x0
277276
%x_wide:_(s32) = COPY $w0

llvm/test/CodeGen/AArch64/pr58431.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
define i32 @f(i64 %0) {
55
; CHECK-LABEL: f:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: mov w8, #10
7+
; CHECK-NEXT: mov w8, #10 // =0xa
88
; CHECK-NEXT: mov w9, w0
99
; CHECK-NEXT: udiv x10, x9, x8
1010
; CHECK-NEXT: msub x0, x10, x8, x9

llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll

Lines changed: 94 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,113 +1804,110 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
18041804
define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) {
18051805
; GFX6-LABEL: s_fshl_v2i24:
18061806
; GFX6: ; %bb.0:
1807+
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24
1808+
; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2
18071809
; GFX6-NEXT: s_lshr_b32 s6, s0, 16
1808-
; GFX6-NEXT: s_lshr_b32 s7, s0, 24
1809-
; GFX6-NEXT: s_and_b32 s9, s0, 0xff
1810-
; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008
1811-
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
1812-
; GFX6-NEXT: s_lshl_b32 s0, s0, 8
1810+
; GFX6-NEXT: s_lshr_b32 s7, s1, 8
1811+
; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008
1812+
; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1813+
; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
1814+
; GFX6-NEXT: s_and_b32 s8, s0, 0xff
1815+
; GFX6-NEXT: s_lshl_b32 s9, s9, 8
18131816
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1814-
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
1815-
; GFX6-NEXT: s_or_b32 s0, s9, s0
1817+
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1818+
; GFX6-NEXT: v_mov_b32_e32 v0, s0
1819+
; GFX6-NEXT: s_and_b32 s0, s7, 0xff
1820+
; GFX6-NEXT: s_or_b32 s8, s8, s9
18161821
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1817-
; GFX6-NEXT: s_lshr_b32 s8, s1, 8
1822+
; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24
18181823
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1824+
; GFX6-NEXT: s_and_b32 s8, 0xffff, s8
18191825
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1826+
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
1827+
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
1828+
; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8
1829+
; GFX6-NEXT: s_or_b32 s6, s8, s6
1830+
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
1831+
; GFX6-NEXT: s_lshr_b32 s0, s2, 16
1832+
; GFX6-NEXT: s_lshr_b32 s1, s3, 8
1833+
; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008
1834+
; GFX6-NEXT: v_mul_lo_u32 v3, v2, v3
1835+
; GFX6-NEXT: s_and_b32 s7, s2, 0xff
1836+
; GFX6-NEXT: s_lshl_b32 s8, s8, 8
1837+
; GFX6-NEXT: s_and_b32 s0, s0, 0xff
1838+
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1839+
; GFX6-NEXT: v_mov_b32_e32 v1, s2
18201840
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1821-
; GFX6-NEXT: s_or_b32 s0, s0, s6
1822-
; GFX6-NEXT: s_lshl_b32 s1, s1, 8
1823-
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1824-
; GFX6-NEXT: s_or_b32 s1, s7, s1
1825-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1826-
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1841+
; GFX6-NEXT: s_or_b32 s7, s7, s8
1842+
; GFX6-NEXT: s_and_b32 s0, 0xffff, s0
1843+
; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24
1844+
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1845+
; GFX6-NEXT: s_and_b32 s7, 0xffff, s7
1846+
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
1847+
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
1848+
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1849+
; GFX6-NEXT: s_or_b32 s0, s7, s0
1850+
; GFX6-NEXT: v_or_b32_e32 v1, s1, v1
1851+
; GFX6-NEXT: s_lshr_b32 s1, s4, 16
1852+
; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008
1853+
; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3
1854+
; GFX6-NEXT: s_and_b32 s3, s4, 0xff
1855+
; GFX6-NEXT: s_lshl_b32 s7, s7, 8
1856+
; GFX6-NEXT: s_and_b32 s1, s1, 0xff
1857+
; GFX6-NEXT: s_or_b32 s3, s3, s7
18271858
; GFX6-NEXT: s_and_b32 s1, 0xffff, s1
1828-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1829-
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
1830-
; GFX6-NEXT: s_or_b32 s1, s1, s6
1831-
; GFX6-NEXT: s_lshr_b32 s6, s2, 16
1832-
; GFX6-NEXT: s_lshr_b32 s7, s2, 24
1833-
; GFX6-NEXT: s_and_b32 s9, s2, 0xff
1834-
; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008
1835-
; GFX6-NEXT: s_lshl_b32 s2, s2, 8
1836-
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1837-
; GFX6-NEXT: s_or_b32 s2, s9, s2
1838-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1839-
; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
1840-
; GFX6-NEXT: s_lshr_b32 s8, s3, 8
1841-
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1842-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1843-
; GFX6-NEXT: s_and_b32 s3, s3, 0xff
1844-
; GFX6-NEXT: v_mul_lo_u32 v1, v0, v1
1845-
; GFX6-NEXT: s_or_b32 s2, s2, s6
1846-
; GFX6-NEXT: s_lshl_b32 s3, s3, 8
1847-
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1848-
; GFX6-NEXT: s_or_b32 s3, s7, s3
1849-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
18501859
; GFX6-NEXT: s_and_b32 s3, 0xffff, s3
1851-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1852-
; GFX6-NEXT: s_or_b32 s3, s3, s6
1853-
; GFX6-NEXT: s_lshr_b32 s6, s4, 16
1854-
; GFX6-NEXT: s_lshr_b32 s7, s4, 24
1855-
; GFX6-NEXT: s_and_b32 s9, s4, 0xff
1856-
; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008
1857-
; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1
1858-
; GFX6-NEXT: s_lshl_b32 s4, s4, 8
1859-
; GFX6-NEXT: s_and_b32 s6, s6, 0xff
1860-
; GFX6-NEXT: s_or_b32 s4, s9, s4
1861-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1862-
; GFX6-NEXT: s_and_b32 s4, 0xffff, s4
1863-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1864-
; GFX6-NEXT: s_or_b32 s4, s4, s6
1865-
; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1866-
; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0
1867-
; GFX6-NEXT: s_lshr_b32 s8, s5, 8
1868-
; GFX6-NEXT: s_and_b32 s5, s5, 0xff
1869-
; GFX6-NEXT: s_lshl_b32 s5, s5, 8
1870-
; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24
1871-
; GFX6-NEXT: s_and_b32 s6, s8, 0xff
1872-
; GFX6-NEXT: s_or_b32 s5, s7, s5
1873-
; GFX6-NEXT: s_and_b32 s6, 0xffff, s6
1874-
; GFX6-NEXT: s_and_b32 s5, 0xffff, s5
1875-
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
1876-
; GFX6-NEXT: s_or_b32 s5, s5, s6
1877-
; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s4, v1
1878-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1879-
; GFX6-NEXT: v_mul_hi_u32 v0, s5, v0
1880-
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1881-
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1882-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1
1883-
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1
1884-
; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24
1885-
; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1886-
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1
1887-
; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1
1888-
; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1
1889-
; GFX6-NEXT: s_lshr_b32 s0, s2, 1
1860+
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
1861+
; GFX6-NEXT: s_or_b32 s1, s3, s1
1862+
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1863+
; GFX6-NEXT: v_mul_hi_u32 v3, s1, v2
1864+
; GFX6-NEXT: s_lshr_b32 s2, s5, 8
1865+
; GFX6-NEXT: s_and_b32 s3, s5, 0xff
1866+
; GFX6-NEXT: v_mov_b32_e32 v4, s4
1867+
; GFX6-NEXT: s_and_b32 s2, s2, 0xff
1868+
; GFX6-NEXT: v_alignbit_b32 v4, s3, v4, 24
1869+
; GFX6-NEXT: s_and_b32 s2, 0xffff, s2
1870+
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
1871+
; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24
1872+
; GFX6-NEXT: s_lshl_b32 s2, s2, 16
1873+
; GFX6-NEXT: v_or_b32_e32 v4, s2, v4
1874+
; GFX6-NEXT: v_mul_hi_u32 v2, v4, v2
1875+
; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s1, v3
1876+
; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
1877+
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
1878+
; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24
1879+
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1880+
; GFX6-NEXT: v_subrev_i32_e32 v5, vcc, 24, v3
1881+
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v3
1882+
; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1883+
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v4, v2
1884+
; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 23, v3
1885+
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
1886+
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1887+
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1888+
; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2
1889+
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2
1890+
; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3
1891+
; GFX6-NEXT: s_lshr_b32 s0, s0, 1
1892+
; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
1893+
; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1894+
; GFX6-NEXT: v_lshl_b32_e32 v3, s6, v3
1895+
; GFX6-NEXT: v_lshr_b32_e32 v5, s0, v5
1896+
; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2
18901897
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1891-
; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1892-
; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s5, v0
1898+
; GFX6-NEXT: v_or_b32_e32 v3, v3, v5
1899+
; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0
1900+
; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1
1901+
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v4
1902+
; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1
1903+
; GFX6-NEXT: v_bfe_u32 v2, v3, 8, 8
1904+
; GFX6-NEXT: v_or_b32_e32 v0, v0, v1
1905+
; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3
1906+
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2
1907+
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1908+
; GFX6-NEXT: v_bfe_u32 v2, v3, 16, 8
1909+
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2
18931910
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2
1894-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
1895-
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1896-
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1897-
; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v0
1898-
; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0
1899-
; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1900-
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v0
1901-
; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1902-
; GFX6-NEXT: s_lshr_b32 s0, s3, 1
1903-
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1904-
; GFX6-NEXT: v_lshl_b32_e32 v0, s1, v0
1905-
; GFX6-NEXT: v_lshr_b32_e32 v2, s0, v2
1906-
; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8
1907-
; GFX6-NEXT: v_or_b32_e32 v0, v0, v2
1908-
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1
1909-
; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3
1910-
; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8
1911-
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
1912-
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1913-
; GFX6-NEXT: v_or_b32_e32 v1, v2, v1
19141911
; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0
19151912
; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2
19161913
; GFX6-NEXT: v_or_b32_e32 v1, v1, v2

0 commit comments

Comments
 (0)