@@ -60,13 +60,21 @@ define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
60
60
; GFX10-NEXT: global_store_short v[2:3], v0, off
61
61
; GFX10-NEXT: s_setpc_b64 s[30:31]
62
62
;
63
- ; GFX11-LABEL: test_load_store:
64
- ; GFX11: ; %bb.0:
65
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66
- ; GFX11-NEXT: global_load_u16 v0, v[0:1], off
67
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
68
- ; GFX11-NEXT: global_store_b16 v[2:3], v0, off
69
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
63
+ ; GFX11TRUE16-LABEL: test_load_store:
64
+ ; GFX11TRUE16: ; %bb.0:
65
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66
+ ; GFX11TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
67
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
68
+ ; GFX11TRUE16-NEXT: global_store_b16 v[2:3], v0, off
69
+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
70
+ ;
71
+ ; GFX11FAKE16-LABEL: test_load_store:
72
+ ; GFX11FAKE16: ; %bb.0:
73
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74
+ ; GFX11FAKE16-NEXT: global_load_u16 v0, v[0:1], off
75
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
76
+ ; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v0, off
77
+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
70
78
%val = load bfloat, ptr addrspace(1) %in
71
79
store bfloat %val, ptr addrspace(1) %out
72
80
ret void
@@ -2127,14 +2135,23 @@ define void @test_store_fpimm(ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) {
2127
2135
; GFX10-NEXT: global_store_short v[2:3], v5, off
2128
2136
; GFX10-NEXT: s_setpc_b64 s[30:31]
2129
2137
;
2130
- ; GFX11-LABEL: test_store_fpimm:
2131
- ; GFX11: ; %bb.0:
2132
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2133
- ; GFX11-NEXT: v_mov_b32_e32 v4, 0x3f80
2134
- ; GFX11-NEXT: v_mov_b32_e32 v5, 0x4228
2135
- ; GFX11-NEXT: global_store_b16 v[0:1], v4, off
2136
- ; GFX11-NEXT: global_store_b16 v[2:3], v5, off
2137
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
2138
+ ; GFX11TRUE16-LABEL: test_store_fpimm:
2139
+ ; GFX11TRUE16: ; %bb.0:
2140
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2141
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.l, 0x3f80
2142
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v4.h, 0x4228
2143
+ ; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v4, off
2144
+ ; GFX11TRUE16-NEXT: global_store_d16_hi_b16 v[2:3], v4, off
2145
+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
2146
+ ;
2147
+ ; GFX11FAKE16-LABEL: test_store_fpimm:
2148
+ ; GFX11FAKE16: ; %bb.0:
2149
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150
+ ; GFX11FAKE16-NEXT: v_mov_b32_e32 v4, 0x3f80
2151
+ ; GFX11FAKE16-NEXT: v_mov_b32_e32 v5, 0x4228
2152
+ ; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v4, off
2153
+ ; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v5, off
2154
+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
2138
2155
store bfloat 1.0, ptr addrspace(1) %ptr0
2139
2156
store bfloat 42.0, ptr addrspace(1) %ptr1
2140
2157
ret void
@@ -3330,12 +3347,19 @@ define amdgpu_gfx void @test_inreg_arg_store(bfloat inreg %in, ptr addrspace(1)
3330
3347
; GFX10-NEXT: global_store_short v[0:1], v2, off
3331
3348
; GFX10-NEXT: s_setpc_b64 s[30:31]
3332
3349
;
3333
- ; GFX11-LABEL: test_inreg_arg_store:
3334
- ; GFX11: ; %bb.0:
3335
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3336
- ; GFX11-NEXT: v_mov_b32_e32 v2, s4
3337
- ; GFX11-NEXT: global_store_b16 v[0:1], v2, off
3338
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
3350
+ ; GFX11TRUE16-LABEL: test_inreg_arg_store:
3351
+ ; GFX11TRUE16: ; %bb.0:
3352
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3353
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v2.l, s4
3354
+ ; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v2, off
3355
+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3356
+ ;
3357
+ ; GFX11FAKE16-LABEL: test_inreg_arg_store:
3358
+ ; GFX11FAKE16: ; %bb.0:
3359
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3360
+ ; GFX11FAKE16-NEXT: v_mov_b32_e32 v2, s4
3361
+ ; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v2, off
3362
+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
3339
3363
store bfloat %in, ptr addrspace(1) %out
3340
3364
ret void
3341
3365
}
@@ -3379,11 +3403,18 @@ define bfloat @test_byval(ptr addrspace(5) byval(bfloat) %bv, bfloat %val) {
3379
3403
; GFX10-NEXT: buffer_store_short v0, off, s[0:3], s32
3380
3404
; GFX10-NEXT: s_setpc_b64 s[30:31]
3381
3405
;
3382
- ; GFX11-LABEL: test_byval:
3383
- ; GFX11: ; %bb.0:
3384
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3385
- ; GFX11-NEXT: scratch_store_b16 off, v0, s32
3386
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
3406
+ ; GFX11TRUE16-LABEL: test_byval:
3407
+ ; GFX11TRUE16: ; %bb.0:
3408
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3409
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
3410
+ ; GFX11TRUE16-NEXT: scratch_store_b16 off, v1, s32
3411
+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3412
+ ;
3413
+ ; GFX11FAKE16-LABEL: test_byval:
3414
+ ; GFX11FAKE16: ; %bb.0:
3415
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3416
+ ; GFX11FAKE16-NEXT: scratch_store_b16 off, v0, s32
3417
+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
3387
3418
store bfloat %val, ptr addrspace(5) %bv
3388
3419
%retval = load bfloat, ptr addrspace(5) %bv
3389
3420
ret bfloat %retval
@@ -3490,13 +3521,21 @@ define void @test_bitcast_from_bfloat(ptr addrspace(1) %in, ptr addrspace(1) %ou
3490
3521
; GFX10-NEXT: global_store_short v[2:3], v0, off
3491
3522
; GFX10-NEXT: s_setpc_b64 s[30:31]
3492
3523
;
3493
- ; GFX11-LABEL: test_bitcast_from_bfloat:
3494
- ; GFX11: ; %bb.0:
3495
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3496
- ; GFX11-NEXT: global_load_u16 v0, v[0:1], off
3497
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
3498
- ; GFX11-NEXT: global_store_b16 v[2:3], v0, off
3499
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
3524
+ ; GFX11TRUE16-LABEL: test_bitcast_from_bfloat:
3525
+ ; GFX11TRUE16: ; %bb.0:
3526
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3527
+ ; GFX11TRUE16-NEXT: global_load_d16_b16 v0, v[0:1], off
3528
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
3529
+ ; GFX11TRUE16-NEXT: global_store_b16 v[2:3], v0, off
3530
+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3531
+ ;
3532
+ ; GFX11FAKE16-LABEL: test_bitcast_from_bfloat:
3533
+ ; GFX11FAKE16: ; %bb.0:
3534
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3535
+ ; GFX11FAKE16-NEXT: global_load_u16 v0, v[0:1], off
3536
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
3537
+ ; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v0, off
3538
+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
3500
3539
%val = load bfloat, ptr addrspace(1) %in
3501
3540
%val_int = bitcast bfloat %val to i16
3502
3541
store i16 %val_int, ptr addrspace(1) %out
@@ -3556,13 +3595,21 @@ define void @test_bitcast_to_bfloat(ptr addrspace(1) %out, ptr addrspace(1) %in)
3556
3595
; GFX10-NEXT: global_store_short v[0:1], v2, off
3557
3596
; GFX10-NEXT: s_setpc_b64 s[30:31]
3558
3597
;
3559
- ; GFX11-LABEL: test_bitcast_to_bfloat:
3560
- ; GFX11: ; %bb.0:
3561
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3562
- ; GFX11-NEXT: global_load_u16 v2, v[2:3], off
3563
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
3564
- ; GFX11-NEXT: global_store_b16 v[0:1], v2, off
3565
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
3598
+ ; GFX11TRUE16-LABEL: test_bitcast_to_bfloat:
3599
+ ; GFX11TRUE16: ; %bb.0:
3600
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3601
+ ; GFX11TRUE16-NEXT: global_load_d16_b16 v2, v[2:3], off
3602
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
3603
+ ; GFX11TRUE16-NEXT: global_store_b16 v[0:1], v2, off
3604
+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
3605
+ ;
3606
+ ; GFX11FAKE16-LABEL: test_bitcast_to_bfloat:
3607
+ ; GFX11FAKE16: ; %bb.0:
3608
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3609
+ ; GFX11FAKE16-NEXT: global_load_u16 v2, v[2:3], off
3610
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
3611
+ ; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v2, off
3612
+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
3566
3613
%val = load i16, ptr addrspace(1) %in
3567
3614
%val_fp = bitcast i16 %val to bfloat
3568
3615
store bfloat %val_fp, ptr addrspace(1) %out
@@ -5309,14 +5356,23 @@ define bfloat @test_alloca_load_store_ret(bfloat %in) {
5309
5356
; GFX10-NEXT: s_waitcnt vmcnt(0)
5310
5357
; GFX10-NEXT: s_setpc_b64 s[30:31]
5311
5358
;
5312
- ; GFX11-LABEL: test_alloca_load_store_ret:
5313
- ; GFX11: ; %bb.0: ; %entry
5314
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5315
- ; GFX11-NEXT: scratch_store_b16 off, v0, s32 dlc
5316
- ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
5317
- ; GFX11-NEXT: scratch_load_u16 v0, off, s32 glc dlc
5318
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
5319
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
5359
+ ; GFX11TRUE16-LABEL: test_alloca_load_store_ret:
5360
+ ; GFX11TRUE16: ; %bb.0: ; %entry
5361
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5362
+ ; GFX11TRUE16-NEXT: scratch_store_b16 off, v0, s32 dlc
5363
+ ; GFX11TRUE16-NEXT: s_waitcnt_vscnt null, 0x0
5364
+ ; GFX11TRUE16-NEXT: scratch_load_d16_b16 v0, off, s32 glc dlc
5365
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
5366
+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
5367
+ ;
5368
+ ; GFX11FAKE16-LABEL: test_alloca_load_store_ret:
5369
+ ; GFX11FAKE16: ; %bb.0: ; %entry
5370
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5371
+ ; GFX11FAKE16-NEXT: scratch_store_b16 off, v0, s32 dlc
5372
+ ; GFX11FAKE16-NEXT: s_waitcnt_vscnt null, 0x0
5373
+ ; GFX11FAKE16-NEXT: scratch_load_u16 v0, off, s32 glc dlc
5374
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
5375
+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
5320
5376
entry:
5321
5377
%in.addr = alloca bfloat, align 2, addrspace(5)
5322
5378
store volatile bfloat %in, ptr addrspace(5) %in.addr, align 2
@@ -5667,26 +5723,48 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) {
5667
5723
; GFX10-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen offset:128
5668
5724
; GFX10-NEXT: s_setpc_b64 s[30:31]
5669
5725
;
5670
- ; GFX11-LABEL: test_overflow_stack:
5671
- ; GFX11: ; %bb.0:
5672
- ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5673
- ; GFX11-NEXT: s_clause 0x2
5674
- ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8
5675
- ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4
5676
- ; GFX11-NEXT: scratch_load_b32 v31, off, s32
5677
- ; GFX11-NEXT: s_clause 0x5
5678
- ; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
5679
- ; GFX11-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
5680
- ; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
5681
- ; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
5682
- ; GFX11-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
5683
- ; GFX11-NEXT: scratch_store_b128 v0, v[2:5], off
5684
- ; GFX11-NEXT: s_waitcnt vmcnt(0)
5685
- ; GFX11-NEXT: s_clause 0x2
5686
- ; GFX11-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
5687
- ; GFX11-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
5688
- ; GFX11-NEXT: scratch_store_b16 v0, v1, off offset:128
5689
- ; GFX11-NEXT: s_setpc_b64 s[30:31]
5726
+ ; GFX11TRUE16-LABEL: test_overflow_stack:
5727
+ ; GFX11TRUE16: ; %bb.0:
5728
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5729
+ ; GFX11TRUE16-NEXT: s_clause 0x2
5730
+ ; GFX11TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
5731
+ ; GFX11TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4
5732
+ ; GFX11TRUE16-NEXT: scratch_load_b32 v31, off, s32
5733
+ ; GFX11TRUE16-NEXT: s_clause 0x3
5734
+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
5735
+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
5736
+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
5737
+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
5738
+ ; GFX11TRUE16-NEXT: s_clause 0x1
5739
+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
5740
+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[2:5], off
5741
+ ; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0)
5742
+ ; GFX11TRUE16-NEXT: s_clause 0x2
5743
+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
5744
+ ; GFX11TRUE16-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
5745
+ ; GFX11TRUE16-NEXT: scratch_store_b16 v0, v1, off offset:128
5746
+ ; GFX11TRUE16-NEXT: s_setpc_b64 s[30:31]
5747
+ ;
5748
+ ; GFX11FAKE16-LABEL: test_overflow_stack:
5749
+ ; GFX11FAKE16: ; %bb.0:
5750
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5751
+ ; GFX11FAKE16-NEXT: s_clause 0x2
5752
+ ; GFX11FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
5753
+ ; GFX11FAKE16-NEXT: scratch_load_b32 v32, off, s32 offset:4
5754
+ ; GFX11FAKE16-NEXT: scratch_load_b32 v31, off, s32
5755
+ ; GFX11FAKE16-NEXT: s_clause 0x5
5756
+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
5757
+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
5758
+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
5759
+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
5760
+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
5761
+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[2:5], off
5762
+ ; GFX11FAKE16-NEXT: s_waitcnt vmcnt(0)
5763
+ ; GFX11FAKE16-NEXT: s_clause 0x2
5764
+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
5765
+ ; GFX11FAKE16-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
5766
+ ; GFX11FAKE16-NEXT: scratch_store_b16 v0, v1, off offset:128
5767
+ ; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
5690
5768
%ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0
5691
5769
%ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1
5692
5770
ret { <32 x i32>, bfloat } %ins.1
@@ -42719,7 +42797,7 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
42719
42797
; GFX11TRUE16: ; %bb.0:
42720
42798
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42721
42799
; GFX11TRUE16-NEXT: s_clause 0x1f
42722
- ; GFX11TRUE16-NEXT: scratch_load_u16 v31, off, s32
42800
+ ; GFX11TRUE16-NEXT: scratch_load_d16_b16 v31, off, s32
42723
42801
; GFX11TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:64
42724
42802
; GFX11TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:128
42725
42803
; GFX11TRUE16-NEXT: scratch_load_b32 v34, off, s32 offset:60
@@ -42752,16 +42830,17 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
42752
42830
; GFX11TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:72
42753
42831
; GFX11TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:4
42754
42832
; GFX11TRUE16-NEXT: scratch_load_b32 v87, off, s32 offset:68
42833
+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
42755
42834
; GFX11TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
42756
42835
; GFX11TRUE16-NEXT: v_and_b32_e32 v14, 1, v14
42757
- ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
42758
42836
; GFX11TRUE16-NEXT: v_and_b32_e32 v18, 1, v18
42759
42837
; GFX11TRUE16-NEXT: v_and_b32_e32 v20, 1, v20
42760
42838
; GFX11TRUE16-NEXT: v_and_b32_e32 v22, 1, v22
42761
42839
; GFX11TRUE16-NEXT: v_and_b32_e32 v24, 1, v24
42762
42840
; GFX11TRUE16-NEXT: v_and_b32_e32 v26, 1, v26
42763
42841
; GFX11TRUE16-NEXT: v_and_b32_e32 v28, 1, v28
42764
42842
; GFX11TRUE16-NEXT: v_and_b32_e32 v30, 1, v30
42843
+ ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v16
42765
42844
; GFX11TRUE16-NEXT: v_and_b32_e32 v1, 1, v1
42766
42845
; GFX11TRUE16-NEXT: v_and_b32_e32 v2, 1, v2
42767
42846
; GFX11TRUE16-NEXT: v_and_b32_e32 v3, 1, v3
@@ -42785,7 +42864,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
42785
42864
; GFX11TRUE16-NEXT: v_and_b32_e32 v29, 1, v29
42786
42865
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
42787
42866
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s13, 1, v14
42788
- ; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s15, 1, v16
42789
42867
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s17, 1, v18
42790
42868
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s19, 1, v20
42791
42869
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s21, 1, v22
@@ -42815,14 +42893,15 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
42815
42893
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s26, 1, v27
42816
42894
; GFX11TRUE16-NEXT: v_cmp_eq_u32_e64 s29, 1, v29
42817
42895
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(32)
42818
- ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v31
42896
+ ; GFX11TRUE16-NEXT: v_mov_b16_e32 v16.l, v31.l
42819
42897
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(31)
42820
42898
; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v17, 16, v32
42821
42899
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(30)
42822
42900
; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v18, 16, v33
42823
42901
; GFX11TRUE16-NEXT: v_cndmask_b16 v15.l, v33.l, v32.l, s28
42824
42902
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(29)
42825
42903
; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v19, 16, v34
42904
+ ; GFX11TRUE16-NEXT: v_and_b32_e32 v16, 1, v16
42826
42905
; GFX11TRUE16-NEXT: s_waitcnt vmcnt(28)
42827
42906
; GFX11TRUE16-NEXT: v_lshrrev_b32_e32 v20, 16, v35
42828
42907
; GFX11TRUE16-NEXT: v_cndmask_b16 v14.l, v35.l, v34.l, s27
0 commit comments