Skip to content

Commit e91feee

Browse files
committed
[AMDGPU] Add ISD::FSHR -> ALIGNBIT support
This patch allows ISD::FSHR(i32) patterns to lower to ALIGNBIT instructions. This improves test coverage of ISD::FSHR matching - x86 has both FSHL/FSHR instructions and we prefer FSHL by default. Differential Revision: https://reviews.llvm.org/D76070
1 parent 9975dc3 commit e91feee

11 files changed

+153
-293
lines changed

llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
333333
setOperationAction(ISD::SUBE, VT, Legal);
334334
}
335335

336+
// The hardware supports 32-bit FSHR, but not FSHL.
337+
setOperationAction(ISD::FSHR, MVT::i32, Legal);
338+
336339
// The hardware supports 32-bit ROTR, but not ROTL.
337340
setOperationAction(ISD::ROTL, MVT::i32, Expand);
338341
setOperationAction(ISD::ROTL, MVT::i64, Expand);

llvm/lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,12 @@ multiclass BFEPattern <Instruction UBFE, Instruction SBFE, Instruction MOV> {
736736
>;
737737
}
738738

739+
// fshr pattern
740+
class FSHRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
741+
(fshr i32:$src0, i32:$src1, i32:$src2),
742+
(BIT_ALIGN $src0, $src1, $src2)
743+
>;
744+
739745
// rotr pattern
740746
class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat <
741747
(rotr i32:$src0, i32:$src1),

llvm/lib/Target/AMDGPU/EvergreenInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,7 @@ def MULADD_UINT24_eg : R600_3OP <0x10, "MULADD_UINT24",
422422
def : UMad24Pat<MULADD_UINT24_eg>;
423423

424424
def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
425+
def : FSHRPattern <BIT_ALIGN_INT_eg>;
425426
def : ROTRPattern <BIT_ALIGN_INT_eg>;
426427
def MULADD_eg : MULADD_Common<0x14>;
427428
def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1484,6 +1484,7 @@ def : UMad24Pat<V_MAD_U32_U24, 1>;
14841484

14851485
// FIXME: This should only be done for VALU inputs
14861486
defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
1487+
def : FSHRPattern <V_ALIGNBIT_B32>;
14871488
def : ROTRPattern <V_ALIGNBIT_B32>;
14881489

14891490
def : GCNPat<(i32 (trunc (srl i64:$src0, (and i32:$src1, (i32 31))))),

llvm/test/CodeGen/AMDGPU/build-vector-packed-partial-undef.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -163,9 +163,8 @@ define void @undef_lo2_v4i16(<2 x i16> %arg0) {
163163
; GFX8-LABEL: undef_lo2_v4i16:
164164
; GFX8: ; %bb.0:
165165
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166-
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
167-
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
168-
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
166+
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
167+
; GFX8-NEXT: v_alignbit_b32 v0, v1, v0, 16
169168
; GFX8-NEXT: ;;#ASMSTART
170169
; GFX8-NEXT: ; use v[0:1]
171170
; GFX8-NEXT: ;;#ASMEND
@@ -190,9 +189,8 @@ define void @undef_lo2_v4f16(<2 x half> %arg0) {
190189
; GFX8-LABEL: undef_lo2_v4f16:
191190
; GFX8: ; %bb.0:
192191
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193-
; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0
194-
; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v0
195-
; GFX8-NEXT: v_or_b32_e32 v0, v0, v1
192+
; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0
193+
; GFX8-NEXT: v_alignbit_b32 v0, v1, v0, 16
196194
; GFX8-NEXT: ;;#ASMSTART
197195
; GFX8-NEXT: ; use v[0:1]
198196
; GFX8-NEXT: ;;#ASMEND

llvm/test/CodeGen/AMDGPU/fshl.ll

Lines changed: 57 additions & 113 deletions
Original file line numberDiff line numberDiff line change
@@ -97,10 +97,8 @@ define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
9797
; SI-NEXT: s_mov_b32 s7, 0xf000
9898
; SI-NEXT: s_mov_b32 s6, -1
9999
; SI-NEXT: s_waitcnt lgkmcnt(0)
100-
; SI-NEXT: s_lshr_b32 s1, s1, 25
101-
; SI-NEXT: s_lshl_b32 s0, s0, 7
102-
; SI-NEXT: s_or_b32 s0, s0, s1
103-
; SI-NEXT: v_mov_b32_e32 v0, s0
100+
; SI-NEXT: v_mov_b32_e32 v0, s1
101+
; SI-NEXT: v_alignbit_b32 v0, s0, v0, 25
104102
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
105103
; SI-NEXT: s_endpgm
106104
;
@@ -109,12 +107,10 @@ define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
109107
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
110108
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
111109
; VI-NEXT: s_waitcnt lgkmcnt(0)
110+
; VI-NEXT: v_mov_b32_e32 v0, s1
111+
; VI-NEXT: v_alignbit_b32 v2, s0, v0, 25
112112
; VI-NEXT: v_mov_b32_e32 v0, s2
113-
; VI-NEXT: s_lshr_b32 s1, s1, 25
114-
; VI-NEXT: s_lshl_b32 s0, s0, 7
115-
; VI-NEXT: s_or_b32 s0, s0, s1
116113
; VI-NEXT: v_mov_b32_e32 v1, s3
117-
; VI-NEXT: v_mov_b32_e32 v2, s0
118114
; VI-NEXT: flat_store_dword v[0:1], v2
119115
; VI-NEXT: s_endpgm
120116
;
@@ -123,28 +119,24 @@ define amdgpu_kernel void @fshl_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) {
123119
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
124120
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c
125121
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
122+
; GFX9-NEXT: v_mov_b32_e32 v0, s1
123+
; GFX9-NEXT: v_alignbit_b32 v2, s0, v0, 25
126124
; GFX9-NEXT: v_mov_b32_e32 v0, s2
127-
; GFX9-NEXT: s_lshr_b32 s1, s1, 25
128-
; GFX9-NEXT: s_lshl_b32 s0, s0, 7
129-
; GFX9-NEXT: s_or_b32 s0, s0, s1
130125
; GFX9-NEXT: v_mov_b32_e32 v1, s3
131-
; GFX9-NEXT: v_mov_b32_e32 v2, s0
132126
; GFX9-NEXT: global_store_dword v[0:1], v2, off
133127
; GFX9-NEXT: s_endpgm
134128
;
135129
; R600-LABEL: fshl_i32_imm:
136130
; R600: ; %bb.0: ; %entry
137-
; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
138-
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
131+
; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[]
132+
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
139133
; R600-NEXT: CF_END
140134
; R600-NEXT: PAD
141135
; R600-NEXT: ALU clause starting at 4:
142-
; R600-NEXT: LSHL T0.W, KC0[2].Z, literal.x,
143-
; R600-NEXT: LSHR * T1.W, KC0[2].W, literal.y,
144-
; R600-NEXT: 7(9.809089e-45), 25(3.503246e-44)
145-
; R600-NEXT: OR_INT T0.X, PV.W, PS,
146-
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
136+
; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x,
147137
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
138+
; R600-NEXT: BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, literal.x,
139+
; R600-NEXT: 25(3.503246e-44), 0(0.000000e+00)
148140
entry:
149141
%0 = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
150142
store i32 %0, i32 addrspace(1)* %in
@@ -283,14 +275,10 @@ define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
283275
; SI-NEXT: s_mov_b32 s7, 0xf000
284276
; SI-NEXT: s_mov_b32 s6, -1
285277
; SI-NEXT: s_waitcnt lgkmcnt(0)
286-
; SI-NEXT: s_lshl_b32 s3, s3, 9
287-
; SI-NEXT: s_lshr_b32 s1, s1, 23
288-
; SI-NEXT: s_lshr_b32 s0, s0, 25
289-
; SI-NEXT: s_lshl_b32 s2, s2, 7
290-
; SI-NEXT: s_or_b32 s1, s3, s1
291-
; SI-NEXT: s_or_b32 s0, s2, s0
278+
; SI-NEXT: v_mov_b32_e32 v0, s1
279+
; SI-NEXT: v_alignbit_b32 v1, s3, v0, 23
292280
; SI-NEXT: v_mov_b32_e32 v0, s0
293-
; SI-NEXT: v_mov_b32_e32 v1, s1
281+
; SI-NEXT: v_alignbit_b32 v0, s2, v0, 25
294282
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
295283
; SI-NEXT: s_endpgm
296284
;
@@ -300,15 +288,11 @@ define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
300288
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
301289
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
302290
; VI-NEXT: s_waitcnt lgkmcnt(0)
291+
; VI-NEXT: v_mov_b32_e32 v0, s1
292+
; VI-NEXT: v_mov_b32_e32 v2, s0
293+
; VI-NEXT: v_alignbit_b32 v1, s5, v0, 23
294+
; VI-NEXT: v_alignbit_b32 v0, s4, v2, 25
303295
; VI-NEXT: v_mov_b32_e32 v2, s2
304-
; VI-NEXT: s_lshl_b32 s5, s5, 9
305-
; VI-NEXT: s_lshr_b32 s1, s1, 23
306-
; VI-NEXT: s_lshr_b32 s0, s0, 25
307-
; VI-NEXT: s_lshl_b32 s4, s4, 7
308-
; VI-NEXT: s_or_b32 s1, s5, s1
309-
; VI-NEXT: s_or_b32 s0, s4, s0
310-
; VI-NEXT: v_mov_b32_e32 v0, s0
311-
; VI-NEXT: v_mov_b32_e32 v1, s1
312296
; VI-NEXT: v_mov_b32_e32 v3, s3
313297
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
314298
; VI-NEXT: s_endpgm
@@ -319,34 +303,26 @@ define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
319303
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c
320304
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
321305
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
306+
; GFX9-NEXT: v_mov_b32_e32 v0, s1
307+
; GFX9-NEXT: v_mov_b32_e32 v2, s0
308+
; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, 23
309+
; GFX9-NEXT: v_alignbit_b32 v0, s4, v2, 25
322310
; GFX9-NEXT: v_mov_b32_e32 v2, s2
323-
; GFX9-NEXT: s_lshl_b32 s5, s5, 9
324-
; GFX9-NEXT: s_lshr_b32 s1, s1, 23
325-
; GFX9-NEXT: s_lshr_b32 s0, s0, 25
326-
; GFX9-NEXT: s_lshl_b32 s4, s4, 7
327-
; GFX9-NEXT: s_or_b32 s1, s5, s1
328-
; GFX9-NEXT: s_or_b32 s0, s4, s0
329-
; GFX9-NEXT: v_mov_b32_e32 v0, s0
330-
; GFX9-NEXT: v_mov_b32_e32 v1, s1
331311
; GFX9-NEXT: v_mov_b32_e32 v3, s3
332312
; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off
333313
; GFX9-NEXT: s_endpgm
334314
;
335315
; R600-LABEL: fshl_v2i32_imm:
336316
; R600: ; %bb.0: ; %entry
337-
; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
317+
; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[]
338318
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
339319
; R600-NEXT: CF_END
340320
; R600-NEXT: PAD
341321
; R600-NEXT: ALU clause starting at 4:
342-
; R600-NEXT: LSHL T0.W, KC0[3].X, literal.x,
343-
; R600-NEXT: LSHR * T1.W, KC0[3].Z, literal.y,
344-
; R600-NEXT: 9(1.261169e-44), 23(3.222986e-44)
345-
; R600-NEXT: OR_INT T0.Y, PV.W, PS,
346-
; R600-NEXT: LSHL T0.W, KC0[2].W, literal.x,
347-
; R600-NEXT: LSHR * T1.W, KC0[3].Y, literal.y,
348-
; R600-NEXT: 7(9.809089e-45), 25(3.503246e-44)
349-
; R600-NEXT: OR_INT T0.X, PV.W, PS,
322+
; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].X, KC0[3].Z, literal.x,
323+
; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
324+
; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, literal.x,
325+
; R600-NEXT: 25(3.503246e-44), 0(0.000000e+00)
350326
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
351327
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
352328
entry:
@@ -557,22 +533,14 @@ define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
557533
; SI-NEXT: s_mov_b32 s7, 0xf000
558534
; SI-NEXT: s_mov_b32 s6, -1
559535
; SI-NEXT: s_waitcnt lgkmcnt(0)
560-
; SI-NEXT: s_lshl_b32 s11, s11, 1
561-
; SI-NEXT: s_lshr_b32 s3, s3, 31
562-
; SI-NEXT: s_lshr_b32 s2, s2, 23
563-
; SI-NEXT: s_lshl_b32 s10, s10, 9
564-
; SI-NEXT: s_lshr_b32 s1, s1, 25
565-
; SI-NEXT: s_lshl_b32 s9, s9, 7
566-
; SI-NEXT: s_lshr_b32 s0, s0, 31
567-
; SI-NEXT: s_lshl_b32 s8, s8, 1
568-
; SI-NEXT: s_or_b32 s3, s11, s3
569-
; SI-NEXT: s_or_b32 s2, s10, s2
570-
; SI-NEXT: s_or_b32 s1, s9, s1
571-
; SI-NEXT: s_or_b32 s0, s8, s0
536+
; SI-NEXT: v_mov_b32_e32 v0, s3
537+
; SI-NEXT: v_alignbit_b32 v3, s11, v0, 31
538+
; SI-NEXT: v_mov_b32_e32 v0, s2
539+
; SI-NEXT: v_alignbit_b32 v2, s10, v0, 23
540+
; SI-NEXT: v_mov_b32_e32 v0, s1
541+
; SI-NEXT: v_alignbit_b32 v1, s9, v0, 25
572542
; SI-NEXT: v_mov_b32_e32 v0, s0
573-
; SI-NEXT: v_mov_b32_e32 v1, s1
574-
; SI-NEXT: v_mov_b32_e32 v2, s2
575-
; SI-NEXT: v_mov_b32_e32 v3, s3
543+
; SI-NEXT: v_alignbit_b32 v0, s8, v0, 31
576544
; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
577545
; SI-NEXT: s_endpgm
578546
;
@@ -583,23 +551,15 @@ define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
583551
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x44
584552
; VI-NEXT: s_waitcnt lgkmcnt(0)
585553
; VI-NEXT: v_mov_b32_e32 v4, s8
586-
; VI-NEXT: s_lshl_b32 s7, s7, 1
587-
; VI-NEXT: s_lshr_b32 s3, s3, 31
588-
; VI-NEXT: s_lshr_b32 s2, s2, 23
589-
; VI-NEXT: s_lshl_b32 s6, s6, 9
590-
; VI-NEXT: s_lshr_b32 s1, s1, 25
591-
; VI-NEXT: s_lshl_b32 s5, s5, 7
592-
; VI-NEXT: s_lshr_b32 s0, s0, 31
593-
; VI-NEXT: s_lshl_b32 s4, s4, 1
594-
; VI-NEXT: s_or_b32 s3, s7, s3
595-
; VI-NEXT: s_or_b32 s2, s6, s2
596-
; VI-NEXT: s_or_b32 s1, s5, s1
597-
; VI-NEXT: s_or_b32 s0, s4, s0
598-
; VI-NEXT: v_mov_b32_e32 v0, s0
599-
; VI-NEXT: v_mov_b32_e32 v1, s1
600-
; VI-NEXT: v_mov_b32_e32 v2, s2
601-
; VI-NEXT: v_mov_b32_e32 v3, s3
602554
; VI-NEXT: v_mov_b32_e32 v5, s9
555+
; VI-NEXT: v_mov_b32_e32 v0, s3
556+
; VI-NEXT: v_mov_b32_e32 v1, s2
557+
; VI-NEXT: v_alignbit_b32 v3, s7, v0, 31
558+
; VI-NEXT: v_mov_b32_e32 v0, s1
559+
; VI-NEXT: v_alignbit_b32 v2, s6, v1, 23
560+
; VI-NEXT: v_alignbit_b32 v1, s5, v0, 25
561+
; VI-NEXT: v_mov_b32_e32 v0, s0
562+
; VI-NEXT: v_alignbit_b32 v0, s4, v0, 31
603563
; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
604564
; VI-NEXT: s_endpgm
605565
;
@@ -610,49 +570,33 @@ define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
610570
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x44
611571
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
612572
; GFX9-NEXT: v_mov_b32_e32 v4, s8
613-
; GFX9-NEXT: s_lshl_b32 s7, s7, 1
614-
; GFX9-NEXT: s_lshr_b32 s3, s3, 31
615-
; GFX9-NEXT: s_lshr_b32 s2, s2, 23
616-
; GFX9-NEXT: s_lshl_b32 s6, s6, 9
617-
; GFX9-NEXT: s_lshr_b32 s1, s1, 25
618-
; GFX9-NEXT: s_lshl_b32 s5, s5, 7
619-
; GFX9-NEXT: s_lshr_b32 s0, s0, 31
620-
; GFX9-NEXT: s_lshl_b32 s4, s4, 1
621-
; GFX9-NEXT: s_or_b32 s3, s7, s3
622-
; GFX9-NEXT: s_or_b32 s2, s6, s2
623-
; GFX9-NEXT: s_or_b32 s1, s5, s1
624-
; GFX9-NEXT: s_or_b32 s0, s4, s0
625-
; GFX9-NEXT: v_mov_b32_e32 v0, s0
626-
; GFX9-NEXT: v_mov_b32_e32 v1, s1
627-
; GFX9-NEXT: v_mov_b32_e32 v2, s2
628-
; GFX9-NEXT: v_mov_b32_e32 v3, s3
629573
; GFX9-NEXT: v_mov_b32_e32 v5, s9
574+
; GFX9-NEXT: v_mov_b32_e32 v0, s3
575+
; GFX9-NEXT: v_mov_b32_e32 v1, s2
576+
; GFX9-NEXT: v_alignbit_b32 v3, s7, v0, 31
577+
; GFX9-NEXT: v_mov_b32_e32 v0, s1
578+
; GFX9-NEXT: v_alignbit_b32 v2, s6, v1, 23
579+
; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, 25
580+
; GFX9-NEXT: v_mov_b32_e32 v0, s0
581+
; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, 31
630582
; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off
631583
; GFX9-NEXT: s_endpgm
632584
;
633585
; R600-LABEL: fshl_v4i32_imm:
634586
; R600: ; %bb.0: ; %entry
635-
; R600-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[]
587+
; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
636588
; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1
637589
; R600-NEXT: CF_END
638590
; R600-NEXT: PAD
639591
; R600-NEXT: ALU clause starting at 4:
640-
; R600-NEXT: LSHL T0.W, KC0[4].X, 1,
641-
; R600-NEXT: LSHR * T1.W, KC0[5].X, literal.x,
592+
; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, literal.x,
642593
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
643-
; R600-NEXT: LSHL T0.Z, KC0[3].W, literal.x,
644-
; R600-NEXT: LSHR T2.W, KC0[4].W, literal.y,
645-
; R600-NEXT: OR_INT * T0.W, PV.W, PS,
646-
; R600-NEXT: 9(1.261169e-44), 23(3.222986e-44)
647-
; R600-NEXT: OR_INT T0.Z, PV.Z, PV.W,
648-
; R600-NEXT: LSHL T1.W, KC0[3].Z, literal.x,
649-
; R600-NEXT: LSHR * T2.W, KC0[4].Z, literal.y,
650-
; R600-NEXT: 7(9.809089e-45), 25(3.503246e-44)
651-
; R600-NEXT: OR_INT T0.Y, PV.W, PS,
652-
; R600-NEXT: LSHL T1.W, KC0[3].Y, 1,
653-
; R600-NEXT: LSHR * T2.W, KC0[4].Y, literal.x,
594+
; R600-NEXT: BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, literal.x,
595+
; R600-NEXT: 23(3.222986e-44), 0(0.000000e+00)
596+
; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, literal.x,
597+
; R600-NEXT: 25(3.503246e-44), 0(0.000000e+00)
598+
; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, literal.x,
654599
; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00)
655-
; R600-NEXT: OR_INT T0.X, PV.W, PS,
656600
; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
657601
; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00)
658602
entry:

0 commit comments

Comments
 (0)