@@ -4526,33 +4526,33 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
4526
4526
; GFX6-NEXT: s_mov_b32 s0, s4
4527
4527
; GFX6-NEXT: s_mov_b32 s1, s5
4528
4528
; GFX6-NEXT: s_waitcnt vmcnt(0)
4529
- ; GFX6-NEXT: v_bfe_u32 v2, v29, 11, 1
4530
- ; GFX6-NEXT: v_bfe_u32 v0, v29, 10 , 1
4531
- ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80
4532
- ; GFX6-NEXT: v_bfe_u32 v5, v29, 9 , 1
4529
+ ; GFX6-NEXT: v_lshrrev_b32_e32 v2, 15, v29
4530
+ ; GFX6-NEXT: v_bfe_u32 v0, v29, 14 , 1
4531
+ ; GFX6-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112
4532
+ ; GFX6-NEXT: v_bfe_u32 v5, v29, 13 , 1
4533
4533
; GFX6-NEXT: s_waitcnt expcnt(0)
4534
- ; GFX6-NEXT: v_bfe_u32 v3, v29, 8 , 1
4535
- ; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:64
4536
- ; GFX6-NEXT: v_lshrrev_b32_e32 v8, 15, v29
4534
+ ; GFX6-NEXT: v_bfe_u32 v3, v29, 12 , 1
4535
+ ; GFX6-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:96
4536
+ ; GFX6-NEXT: v_bfe_u32 v8, v29, 11, 1
4537
4537
; GFX6-NEXT: s_waitcnt expcnt(0)
4538
- ; GFX6-NEXT: v_bfe_u32 v6, v29, 14 , 1
4539
- ; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:112
4540
- ; GFX6-NEXT: v_bfe_u32 v27, v29, 5 , 1
4541
- ; GFX6-NEXT: v_bfe_u32 v23, v29, 7 , 1
4542
- ; GFX6-NEXT: v_bfe_u32 v19, v29, 1 , 1
4543
- ; GFX6-NEXT: v_bfe_u32 v15, v29, 3 , 1
4544
- ; GFX6-NEXT: v_bfe_u32 v11, v29, 13 , 1
4545
- ; GFX6-NEXT: v_bfe_u32 v25, v29, 4, 1
4546
- ; GFX6-NEXT: v_bfe_u32 v21, v29, 6 , 1
4547
- ; GFX6-NEXT: v_and_b32_e32 v17, 1, v29
4548
- ; GFX6-NEXT: v_bfe_u32 v13, v29, 2 , 1
4538
+ ; GFX6-NEXT: v_bfe_u32 v6, v29, 10 , 1
4539
+ ; GFX6-NEXT: buffer_store_dwordx4 v[6:9], off, s[0:3], 0 offset:80
4540
+ ; GFX6-NEXT: v_bfe_u32 v27, v29, 1 , 1
4541
+ ; GFX6-NEXT: v_bfe_u32 v23, v29, 3 , 1
4542
+ ; GFX6-NEXT: v_bfe_u32 v19, v29, 5 , 1
4543
+ ; GFX6-NEXT: v_bfe_u32 v15, v29, 7 , 1
4544
+ ; GFX6-NEXT: v_bfe_u32 v11, v29, 9 , 1
4545
+ ; GFX6-NEXT: v_and_b32_e32 v25, 1, v29
4546
+ ; GFX6-NEXT: v_bfe_u32 v21, v29, 2 , 1
4547
+ ; GFX6-NEXT: v_bfe_u32 v17, v29, 4, 1
4548
+ ; GFX6-NEXT: v_bfe_u32 v13, v29, 6 , 1
4549
4549
; GFX6-NEXT: s_waitcnt expcnt(0)
4550
- ; GFX6-NEXT: v_bfe_u32 v9, v29, 12 , 1
4551
- ; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:96
4552
- ; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:16
4553
- ; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0
4554
- ; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:48
4555
- ; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0 offset:32
4550
+ ; GFX6-NEXT: v_bfe_u32 v9, v29, 8 , 1
4551
+ ; GFX6-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64
4552
+ ; GFX6-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:48
4553
+ ; GFX6-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:32
4554
+ ; GFX6-NEXT: buffer_store_dwordx4 v[21:24], off, s[0:3], 0 offset:16
4555
+ ; GFX6-NEXT: buffer_store_dwordx4 v[25:28], off, s[0:3], 0
4556
4556
; GFX6-NEXT: s_endpgm
4557
4557
;
4558
4558
; GFX8-LABEL: constant_zextload_v16i1_to_v16i64:
@@ -4561,21 +4561,20 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
4561
4561
; GFX8-NEXT: v_mov_b32_e32 v17, 0
4562
4562
; GFX8-NEXT: v_mov_b32_e32 v21, 0
4563
4563
; GFX8-NEXT: v_mov_b32_e32 v19, v17
4564
- ; GFX8-NEXT: v_mov_b32_e32 v13, v17
4564
+ ; GFX8-NEXT: v_mov_b32_e32 v4, 1
4565
4565
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
4566
4566
; GFX8-NEXT: v_mov_b32_e32 v0, s2
4567
4567
; GFX8-NEXT: v_mov_b32_e32 v1, s3
4568
4568
; GFX8-NEXT: flat_load_ushort v2, v[0:1]
4569
4569
; GFX8-NEXT: s_add_u32 s2, s0, 0x70
4570
4570
; GFX8-NEXT: s_addc_u32 s3, s1, 0
4571
- ; GFX8-NEXT: s_add_u32 s4, s0, 0x50
4571
+ ; GFX8-NEXT: s_add_u32 s4, s0, 0x60
4572
4572
; GFX8-NEXT: s_addc_u32 s5, s1, 0
4573
- ; GFX8-NEXT: v_mov_b32_e32 v0, s4
4574
4573
; GFX8-NEXT: v_mov_b32_e32 v24, s3
4575
- ; GFX8-NEXT: v_mov_b32_e32 v1, s5
4576
4574
; GFX8-NEXT: v_mov_b32_e32 v23, s2
4577
- ; GFX8-NEXT: s_add_u32 s2, s0, 64
4575
+ ; GFX8-NEXT: s_add_u32 s2, s0, 0x50
4578
4576
; GFX8-NEXT: s_addc_u32 s3, s1, 0
4577
+ ; GFX8-NEXT: v_mov_b32_e32 v13, v17
4579
4578
; GFX8-NEXT: v_mov_b32_e32 v9, v17
4580
4579
; GFX8-NEXT: v_mov_b32_e32 v5, v17
4581
4580
; GFX8-NEXT: v_mov_b32_e32 v22, 0
@@ -4584,24 +4583,26 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
4584
4583
; GFX8-NEXT: v_mov_b32_e32 v7, 0
4585
4584
; GFX8-NEXT: v_mov_b32_e32 v11, 0
4586
4585
; GFX8-NEXT: s_waitcnt vmcnt(0)
4587
- ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 10, v2
4588
- ; GFX8-NEXT: v_and_b32_e32 v18, 1, v4
4589
- ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 11, v2
4590
- ; GFX8-NEXT: v_and_b32_e32 v4, 1, v4
4591
- ; GFX8-NEXT: v_and_b32_e32 v20, 0xffff, v4
4592
- ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 14, v2
4586
+ ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 13, v2
4587
+ ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 12, v2
4588
+ ; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
4589
+ ; GFX8-NEXT: v_and_b32_e32 v18, 1, v1
4590
+ ; GFX8-NEXT: v_and_b32_e32 v20, 0xffff, v0
4591
+ ; GFX8-NEXT: v_mov_b32_e32 v0, s4
4592
+ ; GFX8-NEXT: v_mov_b32_e32 v1, s5
4593
4593
; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[18:21]
4594
- ; GFX8-NEXT: v_mov_b32_e32 v0, 1
4595
- ; GFX8-NEXT: v_and_b32_e32 v16, 1, v4
4594
+ ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 14, v2
4595
+ ; GFX8-NEXT: v_and_b32_e32 v16, 1, v0
4596
4596
; GFX8-NEXT: v_lshrrev_b16_e32 v18, 15, v2
4597
4597
; GFX8-NEXT: flat_store_dwordx4 v[23:24], v[16:19]
4598
+ ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 11, v2
4598
4599
; GFX8-NEXT: v_mov_b32_e32 v24, s3
4599
- ; GFX8-NEXT: v_and_b32_sdwa v16, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
4600
- ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 9, v2
4601
- ; GFX8-NEXT: v_mov_b32_e32 v23, s2
4600
+ ; GFX8-NEXT: v_lshrrev_b16_e32 v6, 10, v2
4602
4601
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
4603
- ; GFX8-NEXT: s_add_u32 s2, s0, 0x60
4602
+ ; GFX8-NEXT: v_mov_b32_e32 v23, s2
4603
+ ; GFX8-NEXT: s_add_u32 s2, s0, 64
4604
4604
; GFX8-NEXT: v_mov_b32_e32 v19, 0
4605
+ ; GFX8-NEXT: v_and_b32_e32 v16, 1, v6
4605
4606
; GFX8-NEXT: v_and_b32_e32 v18, 0xffff, v0
4606
4607
; GFX8-NEXT: s_addc_u32 s3, s1, 0
4607
4608
; GFX8-NEXT: flat_store_dwordx4 v[23:24], v[16:19]
@@ -4610,17 +4611,16 @@ define amdgpu_kernel void @constant_zextload_v16i1_to_v16i64(ptr addrspace(1) %o
4610
4611
; GFX8-NEXT: s_add_u32 s2, s0, 48
4611
4612
; GFX8-NEXT: s_addc_u32 s3, s1, 0
4612
4613
; GFX8-NEXT: v_mov_b32_e32 v26, s3
4613
- ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 12, v2
4614
4614
; GFX8-NEXT: v_mov_b32_e32 v25, s2
4615
4615
; GFX8-NEXT: s_add_u32 s2, s0, 32
4616
- ; GFX8-NEXT: v_and_b32_e32 v19, 1, v0
4617
- ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 13, v2
4616
+ ; GFX8-NEXT: v_lshrrev_b16_e32 v0, 9, v2
4618
4617
; GFX8-NEXT: v_mov_b32_e32 v20, v17
4619
4618
; GFX8-NEXT: v_mov_b32_e32 v1, v17
4620
4619
; GFX8-NEXT: v_mov_b32_e32 v17, s1
4621
4620
; GFX8-NEXT: s_addc_u32 s3, s1, 0
4622
4621
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
4623
4622
; GFX8-NEXT: v_mov_b32_e32 v16, s0
4623
+ ; GFX8-NEXT: v_and_b32_sdwa v19, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
4624
4624
; GFX8-NEXT: v_and_b32_e32 v21, 0xffff, v0
4625
4625
; GFX8-NEXT: s_add_u32 s0, s0, 16
4626
4626
; GFX8-NEXT: v_lshrrev_b16_e32 v6, 7, v2
0 commit comments