Skip to content

Commit 45fd7c0

Browse files
committed
Revert "[AMDGPU] Mark additional VOP3 as commutable"
This reverts commit d35d8da.
1 parent 221388f commit 45fd7c0

File tree

8 files changed

+67
-143
lines changed

8 files changed

+67
-143
lines changed

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -364,25 +364,22 @@ defm V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGP
364364
defm V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, fshr>;
365365
defm V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_alignbyte>;
366366

367-
// XXX - No FPException seems suspect but manual doesn't say it does
368-
let mayRaiseFPException = 0, isCommutable = 1 in {
369-
defm V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
370-
defm V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
371-
defm V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
372-
defm V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmax3>;
373-
defm V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumax3>;
374-
defm V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmax3>;
375-
defm V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
376-
defm V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
377-
defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
378-
} // End mayRaiseFPException = 0, isCommutable = 1
367+
let mayRaiseFPException = 0 in { // XXX - Seems suspect but manual doesn't say it does
368+
defm V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
369+
defm V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
370+
defm V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
371+
defm V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmax3>;
372+
defm V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmax3>;
373+
defm V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumax3>;
374+
defm V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
375+
defm V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
376+
defm V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
377+
} // End mayRaiseFPException = 0
379378

380-
let isCommutable = 1 in {
381-
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
382-
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
383-
defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
384-
defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
385-
} // End isCommutable = 1
379+
defm V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
380+
defm V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
381+
defm V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
382+
defm V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_CLAMP>>;
386383
defm V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
387384

388385
defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
@@ -622,36 +619,41 @@ class ThreeOpFrag<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
622619
}
623620

624621
let SubtargetPredicate = isGFX9Plus in {
625-
let isCommutable = 1 in {
626-
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
627-
defm V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
628-
defm V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
629-
defm V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
630-
defm V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmed3>;
631-
defm V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumed3>;
632-
defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>;
633-
defm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmin3>;
634-
defm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumin3>;
635-
defm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmin3>;
636-
defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
637-
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
638-
defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmax3>;
639-
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
640-
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
641-
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
642-
defm V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
643-
defm V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
644-
} // End isCommutable = 1
645-
defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
646-
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
647622
defm V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
648623
defm V_LSHL_ADD_U32 : VOP3Inst <"v_lshl_add_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
624+
defm V_ADD_LSHL_U32 : VOP3Inst <"v_add_lshl_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
625+
defm V_ADD3_U32 : VOP3Inst <"v_add3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
649626
defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
627+
defm V_AND_OR_B32 : VOP3Inst <"v_and_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
628+
defm V_OR3_B32 : VOP3Inst <"v_or3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
629+
630+
defm V_XAD_U32 : VOP3Inst <"v_xad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
650631

632+
defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>;
633+
defm V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmed3>;
634+
defm V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumed3>;
635+
636+
defm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmin3>;
637+
defm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmin3>;
638+
defm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumin3>;
639+
640+
defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmax3>;
641+
defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
642+
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
643+
644+
defm V_ADD_I16 : VOP3Inst <"v_add_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
645+
defm V_SUB_I16 : VOP3Inst <"v_sub_i16", VOP3_Profile<VOP_I16_I16_I16, VOP3_OPSEL>>;
646+
647+
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
648+
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>;
651649

652650
defm V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
653651
defm V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
654652

653+
defm V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
654+
defm V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
655+
656+
655657
class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
656658
// This matches (op2 (op1 i32:$src0, i32:$src1), i32:$src2) with conditions.
657659
(ThreeOpFrag<op1, op2> i32:$src0, i32:$src1, i32:$src2),
@@ -728,9 +730,7 @@ class PermlaneDiscardVDstIn<SDPatternOperator permlane,
728730

729731

730732
let SubtargetPredicate = isGFX10Plus in {
731-
let isCommutable = 1 in {
732-
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
733-
} // End isCommutable = 1
733+
defm V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
734734
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32_e64>;
735735

736736
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {

llvm/test/CodeGen/AMDGPU/GlobalISel/add_shl.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) {
112112
;
113113
; GFX10-LABEL: add_shl_vgpr_const_inline_const:
114114
; GFX10: ; %bb.0:
115-
; GFX10-NEXT: v_add_lshl_u32 v0, 0x3f4, v0, 9
115+
; GFX10-NEXT: v_add_lshl_u32 v0, v0, 0x3f4, 9
116116
; GFX10-NEXT: ; return to shader part epilog
117117
%x = add i32 %a, 1012
118118
%result = shl i32 %x, 9

llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3555,7 +3555,7 @@ define <2 x i16> @v_fshl_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
35553555
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v3
35563556
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
35573557
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
3558-
; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v3, v2
3558+
; GFX10-NEXT: v_and_or_b32 v2, v3, 0xffff, v2
35593559
; GFX10-NEXT: v_pk_sub_i16 v3, 16, v2 op_sel_hi:[0,1]
35603560
; GFX10-NEXT: v_pk_lshlrev_b16 v0, v2, v0
35613561
; GFX10-NEXT: v_pk_lshrrev_b16 v1, v3, v1

llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3455,7 +3455,7 @@ define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) {
34553455
; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v3
34563456
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 16, v2
34573457
; GFX10-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc_lo
3458-
; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v3, v2
3458+
; GFX10-NEXT: v_and_or_b32 v2, v3, 0xffff, v2
34593459
; GFX10-NEXT: v_pk_sub_i16 v3, 16, v2 op_sel_hi:[0,1]
34603460
; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1
34613461
; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ define amdgpu_ps float @atomic_add_i32_2d(<8 x i32> inreg %rsrc, i32 %data, i16
469469
; GFX10-NEXT: s_mov_b32 s1, s3
470470
; GFX10-NEXT: s_mov_b32 s2, s4
471471
; GFX10-NEXT: s_mov_b32 s3, s5
472-
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
472+
; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
473473
; GFX10-NEXT: s_mov_b32 s4, s6
474474
; GFX10-NEXT: s_mov_b32 s5, s7
475475
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -596,7 +596,7 @@ define amdgpu_ps float @atomic_add_i32_1darray(<8 x i32> inreg %rsrc, i32 %data,
596596
; GFX10-NEXT: s_mov_b32 s1, s3
597597
; GFX10-NEXT: s_mov_b32 s2, s4
598598
; GFX10-NEXT: s_mov_b32 s3, s5
599-
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
599+
; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
600600
; GFX10-NEXT: s_mov_b32 s4, s6
601601
; GFX10-NEXT: s_mov_b32 s5, s7
602602
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -1243,7 +1243,7 @@ define amdgpu_ps <2 x float> @atomic_add_i64_2d(<8 x i32> inreg %rsrc, i64 %data
12431243
; GFX10-NEXT: s_mov_b32 s1, s3
12441244
; GFX10-NEXT: s_mov_b32 s2, s4
12451245
; GFX10-NEXT: s_mov_b32 s3, s5
1246-
; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
1246+
; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3
12471247
; GFX10-NEXT: s_mov_b32 s4, s6
12481248
; GFX10-NEXT: s_mov_b32 s5, s7
12491249
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -1370,7 +1370,7 @@ define amdgpu_ps <2 x float> @atomic_add_i64_1darray(<8 x i32> inreg %rsrc, i64
13701370
; GFX10-NEXT: s_mov_b32 s1, s3
13711371
; GFX10-NEXT: s_mov_b32 s2, s4
13721372
; GFX10-NEXT: s_mov_b32 s3, s5
1373-
; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
1373+
; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3
13741374
; GFX10-NEXT: s_mov_b32 s4, s6
13751375
; GFX10-NEXT: s_mov_b32 s5, s7
13761376
; GFX10-NEXT: s_mov_b32 s6, s8

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
4444
; GFX10NSA-NEXT: s_mov_b32 s9, s11
4545
; GFX10NSA-NEXT: s_mov_b32 s10, s12
4646
; GFX10NSA-NEXT: s_mov_b32 s11, s13
47-
; GFX10NSA-NEXT: v_and_or_b32 v0, 0xffff, v0, v1
47+
; GFX10NSA-NEXT: v_and_or_b32 v0, v0, 0xffff, v1
4848
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28
4949
; GFX10NSA-NEXT: image_gather4 v[0:3], v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
5050
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
@@ -210,7 +210,7 @@ define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
210210
; GFX10NSA-NEXT: s_mov_b32 s9, s11
211211
; GFX10NSA-NEXT: s_mov_b32 s10, s12
212212
; GFX10NSA-NEXT: s_mov_b32 s11, s13
213-
; GFX10NSA-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
213+
; GFX10NSA-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
214214
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28
215215
; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
216216
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
@@ -376,7 +376,7 @@ define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
376376
; GFX10NSA-NEXT: s_mov_b32 s9, s11
377377
; GFX10NSA-NEXT: s_mov_b32 s10, s12
378378
; GFX10NSA-NEXT: s_mov_b32 s11, s13
379-
; GFX10NSA-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
379+
; GFX10NSA-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
380380
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28
381381
; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
382382
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
@@ -428,7 +428,7 @@ define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> in
428428
; GFX10NSA-NEXT: s_mov_b32 s9, s11
429429
; GFX10NSA-NEXT: s_mov_b32 s10, s12
430430
; GFX10NSA-NEXT: s_mov_b32 s11, s13
431-
; GFX10NSA-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
431+
; GFX10NSA-NEXT: v_and_or_b32 v2, v2, 0xffff, v3
432432
; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s28
433433
; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
434434
; GFX10NSA-NEXT: s_waitcnt vmcnt(0)
@@ -683,7 +683,7 @@ define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
683683
; GFX10NSA-NEXT: s_mov_b32 s1, s3
684684
; GFX10NSA-NEXT: s_mov_b32 s2, s4
685685
; GFX10NSA-NEXT: s_mov_b32 s3, s5
686-
; GFX10NSA-NEXT: v_and_or_b32 v0, 0xffff, v0, v1
686+
; GFX10NSA-NEXT: v_and_or_b32 v0, v0, 0xffff, v1
687687
; GFX10NSA-NEXT: s_mov_b32 s4, s6
688688
; GFX10NSA-NEXT: s_mov_b32 s5, s7
689689
; GFX10NSA-NEXT: s_mov_b32 s6, s8
@@ -729,7 +729,7 @@ define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> i
729729
; GFX10NSA-NEXT: s_mov_b32 s1, s3
730730
; GFX10NSA-NEXT: s_mov_b32 s2, s4
731731
; GFX10NSA-NEXT: s_mov_b32 s3, s5
732-
; GFX10NSA-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
732+
; GFX10NSA-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
733733
; GFX10NSA-NEXT: s_mov_b32 s4, s6
734734
; GFX10NSA-NEXT: s_mov_b32 s5, s7
735735
; GFX10NSA-NEXT: s_mov_b32 s6, s8

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg
7272
; GFX10-NEXT: s_mov_b32 s1, s3
7373
; GFX10-NEXT: s_mov_b32 s2, s4
7474
; GFX10-NEXT: s_mov_b32 s3, s5
75-
; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1
75+
; GFX10-NEXT: v_and_or_b32 v0, v0, 0xffff, v1
7676
; GFX10-NEXT: s_mov_b32 s4, s6
7777
; GFX10-NEXT: s_mov_b32 s5, s7
7878
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -121,7 +121,7 @@ define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inr
121121
; GFX10-NEXT: s_mov_b32 s10, s12
122122
; GFX10-NEXT: s_lshl_b32 s12, s0, 16
123123
; GFX10-NEXT: s_mov_b32 s1, s3
124-
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s12
124+
; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, s12
125125
; GFX10-NEXT: s_mov_b32 s3, s5
126126
; GFX10-NEXT: s_mov_b32 s5, s7
127127
; GFX10-NEXT: s_mov_b32 s7, s9
@@ -164,7 +164,7 @@ define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
164164
; GFX10-NEXT: s_mov_b32 s1, s3
165165
; GFX10-NEXT: s_mov_b32 s2, s4
166166
; GFX10-NEXT: s_mov_b32 s3, s5
167-
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
167+
; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
168168
; GFX10-NEXT: s_mov_b32 s4, s6
169169
; GFX10-NEXT: s_mov_b32 s5, s7
170170
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -213,7 +213,7 @@ define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inr
213213
; GFX10-NEXT: s_mov_b32 s10, s12
214214
; GFX10-NEXT: s_lshl_b32 s12, s0, 16
215215
; GFX10-NEXT: s_mov_b32 s1, s3
216-
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, s12
216+
; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, s12
217217
; GFX10-NEXT: s_mov_b32 s3, s5
218218
; GFX10-NEXT: s_mov_b32 s5, s7
219219
; GFX10-NEXT: s_mov_b32 s7, s9
@@ -256,7 +256,7 @@ define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inr
256256
; GFX10-NEXT: s_mov_b32 s1, s3
257257
; GFX10-NEXT: s_mov_b32 s2, s4
258258
; GFX10-NEXT: s_mov_b32 s3, s5
259-
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
259+
; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
260260
; GFX10-NEXT: s_mov_b32 s4, s6
261261
; GFX10-NEXT: s_mov_b32 s5, s7
262262
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -305,7 +305,7 @@ define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> i
305305
; GFX10-NEXT: s_mov_b32 s10, s12
306306
; GFX10-NEXT: s_lshl_b32 s12, s0, 16
307307
; GFX10-NEXT: s_mov_b32 s1, s3
308-
; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, s12
308+
; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, s12
309309
; GFX10-NEXT: s_mov_b32 s3, s5
310310
; GFX10-NEXT: s_mov_b32 s5, s7
311311
; GFX10-NEXT: s_mov_b32 s7, s9
@@ -348,7 +348,7 @@ define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> i
348348
; GFX10-NEXT: s_mov_b32 s1, s3
349349
; GFX10-NEXT: s_mov_b32 s2, s4
350350
; GFX10-NEXT: s_mov_b32 s3, s5
351-
; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
351+
; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3
352352
; GFX10-NEXT: s_mov_b32 s4, s6
353353
; GFX10-NEXT: s_mov_b32 s5, s7
354354
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -394,7 +394,7 @@ define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inre
394394
; GFX10-NEXT: s_mov_b32 s1, s3
395395
; GFX10-NEXT: s_mov_b32 s2, s4
396396
; GFX10-NEXT: s_mov_b32 s3, s5
397-
; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1
397+
; GFX10-NEXT: v_and_or_b32 v0, v0, 0xffff, v1
398398
; GFX10-NEXT: s_mov_b32 s4, s6
399399
; GFX10-NEXT: s_mov_b32 s5, s7
400400
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -440,7 +440,7 @@ define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> in
440440
; GFX10-NEXT: s_mov_b32 s1, s3
441441
; GFX10-NEXT: s_mov_b32 s2, s4
442442
; GFX10-NEXT: s_mov_b32 s3, s5
443-
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
443+
; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
444444
; GFX10-NEXT: s_mov_b32 s4, s6
445445
; GFX10-NEXT: s_mov_b32 s5, s7
446446
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -486,7 +486,7 @@ define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> in
486486
; GFX10-NEXT: s_mov_b32 s1, s3
487487
; GFX10-NEXT: s_mov_b32 s2, s4
488488
; GFX10-NEXT: s_mov_b32 s3, s5
489-
; GFX10-NEXT: v_and_or_b32 v1, 0xffff, v1, v2
489+
; GFX10-NEXT: v_and_or_b32 v1, v1, 0xffff, v2
490490
; GFX10-NEXT: s_mov_b32 s4, s6
491491
; GFX10-NEXT: s_mov_b32 s5, s7
492492
; GFX10-NEXT: s_mov_b32 s6, s8
@@ -532,7 +532,7 @@ define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32>
532532
; GFX10-NEXT: s_mov_b32 s1, s3
533533
; GFX10-NEXT: s_mov_b32 s2, s4
534534
; GFX10-NEXT: s_mov_b32 s3, s5
535-
; GFX10-NEXT: v_and_or_b32 v2, 0xffff, v2, v3
535+
; GFX10-NEXT: v_and_or_b32 v2, v2, 0xffff, v3
536536
; GFX10-NEXT: s_mov_b32 s4, s6
537537
; GFX10-NEXT: s_mov_b32 s5, s7
538538
; GFX10-NEXT: s_mov_b32 s6, s8

0 commit comments

Comments
 (0)