Skip to content

Commit 9ec97f4

Browse files
committed
[AMDGPU] Fix tests. NFC.
1 parent ee63502 commit 9ec97f4

File tree

2 files changed

+160
-121
lines changed

2 files changed

+160
-121
lines changed

llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll

Lines changed: 60 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -383,39 +383,68 @@ bb:
383383
ret <2 x half> %result
384384
}
385385

386-
define <2 x half> @chain_hi_to_lo_flat() null_pointer_is_valid {
387-
; GCN-LABEL: chain_hi_to_lo_flat:
388-
; GCN: ; %bb.0: ; %bb
389-
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390-
; GCN-NEXT: v_mov_b32_e32 v0, 2
391-
; GCN-NEXT: v_mov_b32_e32 v1, 0
392-
; GCN-NEXT: flat_load_ushort v0, v[0:1]
393-
; GCN-NEXT: v_mov_b32_e32 v1, 0
394-
; GCN-NEXT: v_mov_b32_e32 v2, 0
395-
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
396-
; GCN-NEXT: flat_load_short_d16_hi v0, v[1:2]
397-
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
398-
; GCN-NEXT: s_setpc_b64 s[30:31]
386+
define <2 x half> @chain_hi_to_lo_flat(ptr inreg %ptr) {
387+
; GFX900-LABEL: chain_hi_to_lo_flat:
388+
; GFX900: ; %bb.0: ; %bb
389+
; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390+
; GFX900-NEXT: v_mov_b32_e32 v0, s16
391+
; GFX900-NEXT: v_mov_b32_e32 v1, s17
392+
; GFX900-NEXT: flat_load_ushort v0, v[0:1] offset:2
393+
; GFX900-NEXT: v_mov_b32_e32 v1, 0
394+
; GFX900-NEXT: v_mov_b32_e32 v2, 0
395+
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
396+
; GFX900-NEXT: flat_load_short_d16_hi v0, v[1:2]
397+
; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
398+
; GFX900-NEXT: s_setpc_b64 s[30:31]
399399
;
400-
; GFX10-LABEL: chain_hi_to_lo_flat:
401-
; GFX10: ; %bb.0: ; %bb
402-
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403-
; GFX10-NEXT: v_mov_b32_e32 v0, 2
404-
; GFX10-NEXT: v_mov_b32_e32 v1, 0
405-
; GFX10-NEXT: flat_load_ushort v0, v[0:1]
406-
; GFX10-NEXT: v_mov_b32_e32 v1, 0
407-
; GFX10-NEXT: v_mov_b32_e32 v2, 0
408-
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
409-
; GFX10-NEXT: flat_load_short_d16_hi v0, v[1:2]
410-
; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
411-
; GFX10-NEXT: s_setpc_b64 s[30:31]
400+
; FLATSCR-LABEL: chain_hi_to_lo_flat:
401+
; FLATSCR: ; %bb.0: ; %bb
402+
; FLATSCR-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403+
; FLATSCR-NEXT: v_mov_b32_e32 v0, s0
404+
; FLATSCR-NEXT: v_mov_b32_e32 v1, s1
405+
; FLATSCR-NEXT: flat_load_ushort v0, v[0:1] offset:2
406+
; FLATSCR-NEXT: v_mov_b32_e32 v1, 0
407+
; FLATSCR-NEXT: v_mov_b32_e32 v2, 0
408+
; FLATSCR-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
409+
; FLATSCR-NEXT: flat_load_short_d16_hi v0, v[1:2]
410+
; FLATSCR-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
411+
; FLATSCR-NEXT: s_setpc_b64 s[30:31]
412+
;
413+
; GFX10_DEFAULT-LABEL: chain_hi_to_lo_flat:
414+
; GFX10_DEFAULT: ; %bb.0: ; %bb
415+
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416+
; GFX10_DEFAULT-NEXT: s_add_u32 s4, s16, 2
417+
; GFX10_DEFAULT-NEXT: s_addc_u32 s5, s17, 0
418+
; GFX10_DEFAULT-NEXT: v_mov_b32_e32 v0, s4
419+
; GFX10_DEFAULT-NEXT: v_mov_b32_e32 v1, s5
420+
; GFX10_DEFAULT-NEXT: flat_load_ushort v0, v[0:1]
421+
; GFX10_DEFAULT-NEXT: v_mov_b32_e32 v1, 0
422+
; GFX10_DEFAULT-NEXT: v_mov_b32_e32 v2, 0
423+
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
424+
; GFX10_DEFAULT-NEXT: flat_load_short_d16_hi v0, v[1:2]
425+
; GFX10_DEFAULT-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
426+
; GFX10_DEFAULT-NEXT: s_setpc_b64 s[30:31]
427+
;
428+
; FLATSCR_GFX10-LABEL: chain_hi_to_lo_flat:
429+
; FLATSCR_GFX10: ; %bb.0: ; %bb
430+
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431+
; FLATSCR_GFX10-NEXT: s_add_u32 s0, s0, 2
432+
; FLATSCR_GFX10-NEXT: s_addc_u32 s1, s1, 0
433+
; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v0, s0
434+
; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v1, s1
435+
; FLATSCR_GFX10-NEXT: flat_load_ushort v0, v[0:1]
436+
; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v1, 0
437+
; FLATSCR_GFX10-NEXT: v_mov_b32_e32 v2, 0
438+
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
439+
; FLATSCR_GFX10-NEXT: flat_load_short_d16_hi v0, v[1:2]
440+
; FLATSCR_GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
441+
; FLATSCR_GFX10-NEXT: s_setpc_b64 s[30:31]
412442
;
413443
; GFX11-TRUE16-LABEL: chain_hi_to_lo_flat:
414444
; GFX11-TRUE16: ; %bb.0: ; %bb
415445
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 2
417-
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
418-
; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1]
446+
; GFX11-TRUE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
447+
; GFX11-TRUE16-NEXT: flat_load_d16_b16 v0, v[0:1] offset:2
419448
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, 0
420449
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v2, 0
421450
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -426,17 +455,16 @@ define <2 x half> @chain_hi_to_lo_flat() null_pointer_is_valid {
426455
; GFX11-FAKE16-LABEL: chain_hi_to_lo_flat:
427456
; GFX11-FAKE16: ; %bb.0: ; %bb
428457
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
429-
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 2
430-
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0
431-
; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1]
458+
; GFX11-FAKE16-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
459+
; GFX11-FAKE16-NEXT: flat_load_u16 v0, v[0:1] offset:2
432460
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, 0
433461
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v2, 0
434462
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
435463
; GFX11-FAKE16-NEXT: flat_load_d16_hi_b16 v0, v[1:2]
436464
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
437465
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
438466
bb:
439-
%gep_lo = getelementptr inbounds half, ptr null, i64 1
467+
%gep_lo = getelementptr inbounds half, ptr %ptr, i64 1
440468
%load_lo = load half, ptr %gep_lo
441469
%load_hi = load half, ptr null
442470

llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll

Lines changed: 100 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -3,138 +3,149 @@
33

44
%"struct.__llvm_libc::rpc::Buffer" = type { [8 x i64] }
55

6-
define void @issue63986(i64 %0, i64 %idxprom) null_pointer_is_valid {
6+
define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) {
77
; CHECK-LABEL: issue63986:
88
; CHECK: ; %bb.0: ; %entry
99
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10-
; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3]
10+
; CHECK-NEXT: v_lshlrev_b64 v[8:9], 6, v[2:3]
11+
; CHECK-NEXT: v_mov_b32_e32 v4, s17
12+
; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s16, v8
13+
; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v4, v9, vcc
14+
; CHECK-NEXT: ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge
15+
; CHECK-NEXT: v_mov_b32_e32 v4, 0
16+
; CHECK-NEXT: v_mov_b32_e32 v5, 0
17+
; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5]
1118
; CHECK-NEXT: s_mov_b64 s[4:5], 0
12-
; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion
19+
; CHECK-NEXT: s_waitcnt vmcnt(0)
20+
; CHECK-NEXT: .LBB0_2: ; %loop-memcpy-expansion
1321
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
14-
; CHECK-NEXT: v_mov_b32_e32 v7, s5
15-
; CHECK-NEXT: v_mov_b32_e32 v6, s4
16-
; CHECK-NEXT: flat_load_dwordx4 v[6:9], v[6:7]
17-
; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s4, v4
22+
; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s4, v10
1823
; CHECK-NEXT: s_add_u32 s4, s4, 16
19-
; CHECK-NEXT: v_mov_b32_e32 v11, s5
24+
; CHECK-NEXT: v_mov_b32_e32 v13, s5
2025
; CHECK-NEXT: s_addc_u32 s5, s5, 0
2126
; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
22-
; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v5, v11, vcc
27+
; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v11, v13, vcc
2328
; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
24-
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
25-
; CHECK-NEXT: flat_store_dwordx4 v[10:11], v[6:9]
26-
; CHECK-NEXT: s_cbranch_vccz .LBB0_1
27-
; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header
28-
; CHECK-NEXT: s_branch .LBB0_4
29-
; CHECK-NEXT: ; %bb.3:
30-
; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7
29+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
30+
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[4:7]
31+
; CHECK-NEXT: s_cbranch_vccz .LBB0_2
32+
; CHECK-NEXT: ; %bb.3: ; %loop-memcpy-residual-header
3133
; CHECK-NEXT: s_branch .LBB0_5
32-
; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
33-
; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3]
34-
; CHECK-NEXT: s_cbranch_execnz .LBB0_7
35-
; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader
36-
; CHECK-NEXT: v_or_b32_e32 v2, 32, v4
37-
; CHECK-NEXT: v_mov_b32_e32 v3, v5
34+
; CHECK-NEXT: ; %bb.4:
35+
; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3
36+
; CHECK-NEXT: s_branch .LBB0_6
37+
; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge
38+
; CHECK-NEXT: v_lshlrev_b64 v[2:3], 6, v[2:3]
39+
; CHECK-NEXT: s_cbranch_execnz .LBB0_9
40+
; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge
41+
; CHECK-NEXT: v_mov_b32_e32 v2, 0
42+
; CHECK-NEXT: v_mov_b32_e32 v3, 0
43+
; CHECK-NEXT: flat_load_ubyte v2, v[2:3]
44+
; CHECK-NEXT: s_add_u32 s6, s16, 32
45+
; CHECK-NEXT: s_addc_u32 s4, s17, 0
46+
; CHECK-NEXT: v_mov_b32_e32 v4, s4
47+
; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s6, v8
3848
; CHECK-NEXT: s_mov_b64 s[4:5], 0
39-
; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual
40-
; CHECK-NEXT: s_add_u32 s6, 32, s4
41-
; CHECK-NEXT: s_addc_u32 s7, 0, s5
42-
; CHECK-NEXT: v_mov_b32_e32 v6, s6
43-
; CHECK-NEXT: v_mov_b32_e32 v7, s7
44-
; CHECK-NEXT: flat_load_ubyte v10, v[6:7]
45-
; CHECK-NEXT: v_mov_b32_e32 v9, s5
46-
; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s4, v2
47-
; CHECK-NEXT: v_mov_b32_e32 v7, v5
48-
; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v9, vcc
49+
; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v9, vcc
50+
; CHECK-NEXT: s_waitcnt vmcnt(0)
51+
; CHECK-NEXT: ; %bb.7: ; %loop-memcpy-residual
52+
; CHECK-NEXT: v_mov_b32_e32 v6, s5
53+
; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, s4, v3
4954
; CHECK-NEXT: s_add_u32 s4, s4, 1
50-
; CHECK-NEXT: v_mov_b32_e32 v6, v4
55+
; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v4, v6, vcc
5156
; CHECK-NEXT: s_addc_u32 s5, s5, 0
52-
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
53-
; CHECK-NEXT: flat_store_byte v[8:9], v10
54-
; CHECK-NEXT: .LBB0_7: ; %post-loop-memcpy-expansion
55-
; CHECK-NEXT: v_and_b32_e32 v2, 15, v0
56-
; CHECK-NEXT: v_mov_b32_e32 v3, 0
57+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
58+
; CHECK-NEXT: flat_store_byte v[5:6], v2
59+
; CHECK-NEXT: ; %bb.8:
60+
; CHECK-NEXT: v_mov_b32_e32 v2, v8
61+
; CHECK-NEXT: v_mov_b32_e32 v3, v9
62+
; CHECK-NEXT: .LBB0_9: ; %post-loop-memcpy-expansion
63+
; CHECK-NEXT: v_and_b32_e32 v6, 15, v0
5764
; CHECK-NEXT: v_and_b32_e32 v0, -16, v0
65+
; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v2, v0
66+
; CHECK-NEXT: v_mov_b32_e32 v7, 0
67+
; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v1, vcc
5868
; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
59-
; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3]
60-
; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0
61-
; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v1, vcc
62-
; CHECK-NEXT: s_branch .LBB0_10
63-
; CHECK-NEXT: .LBB0_8: ; %Flow14
64-
; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
69+
; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[6:7]
70+
; CHECK-NEXT: v_mov_b32_e32 v4, s17
71+
; CHECK-NEXT: v_mov_b32_e32 v8, 0
72+
; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s16, v2
73+
; CHECK-NEXT: v_mov_b32_e32 v9, 0
74+
; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v4, v3, vcc
75+
; CHECK-NEXT: s_branch .LBB0_12
76+
; CHECK-NEXT: .LBB0_10: ; %Flow14
77+
; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
6578
; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
6679
; CHECK-NEXT: s_mov_b64 s[8:9], 0
67-
; CHECK-NEXT: .LBB0_9: ; %Flow16
68-
; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
80+
; CHECK-NEXT: .LBB0_11: ; %Flow16
81+
; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
6982
; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9]
70-
; CHECK-NEXT: s_cbranch_vccz .LBB0_18
71-
; CHECK-NEXT: .LBB0_10: ; %while.cond
83+
; CHECK-NEXT: s_cbranch_vccz .LBB0_20
84+
; CHECK-NEXT: .LBB0_12: ; %while.cond
7285
; CHECK-NEXT: ; =>This Loop Header: Depth=1
73-
; CHECK-NEXT: ; Child Loop BB0_12 Depth 2
74-
; CHECK-NEXT: ; Child Loop BB0_16 Depth 2
86+
; CHECK-NEXT: ; Child Loop BB0_14 Depth 2
87+
; CHECK-NEXT: ; Child Loop BB0_18 Depth 2
7588
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
76-
; CHECK-NEXT: s_cbranch_execz .LBB0_13
77-
; CHECK-NEXT: ; %bb.11: ; %loop-memcpy-expansion2.preheader
78-
; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
89+
; CHECK-NEXT: s_cbranch_execz .LBB0_15
90+
; CHECK-NEXT: ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge
91+
; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
92+
; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[8:9]
7993
; CHECK-NEXT: s_mov_b64 s[10:11], 0
8094
; CHECK-NEXT: s_mov_b64 s[12:13], 0
81-
; CHECK-NEXT: .LBB0_12: ; %loop-memcpy-expansion2
82-
; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1
95+
; CHECK-NEXT: s_waitcnt vmcnt(0)
96+
; CHECK-NEXT: .LBB0_14: ; %loop-memcpy-expansion2
97+
; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
8398
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
84-
; CHECK-NEXT: v_mov_b32_e32 v8, s12
85-
; CHECK-NEXT: v_mov_b32_e32 v9, s13
86-
; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[8:9]
87-
; CHECK-NEXT: v_mov_b32_e32 v13, s13
88-
; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s12, v4
99+
; CHECK-NEXT: v_mov_b32_e32 v15, s13
100+
; CHECK-NEXT: v_add_co_u32_e32 v14, vcc, s12, v10
89101
; CHECK-NEXT: s_add_u32 s12, s12, 16
90-
; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v5, v13, vcc
102+
; CHECK-NEXT: v_addc_co_u32_e32 v15, vcc, v11, v15, vcc
91103
; CHECK-NEXT: s_addc_u32 s13, s13, 0
92104
; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
105+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
106+
; CHECK-NEXT: flat_store_dwordx4 v[14:15], v[2:5]
93107
; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
94-
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
95-
; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[8:11]
96108
; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
97-
; CHECK-NEXT: s_cbranch_execnz .LBB0_12
98-
; CHECK-NEXT: .LBB0_13: ; %Flow15
99-
; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
109+
; CHECK-NEXT: s_cbranch_execnz .LBB0_14
110+
; CHECK-NEXT: .LBB0_15: ; %Flow15
111+
; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
100112
; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
101113
; CHECK-NEXT: s_mov_b64 s[8:9], -1
102-
; CHECK-NEXT: s_cbranch_execz .LBB0_9
103-
; CHECK-NEXT: ; %bb.14: ; %loop-memcpy-residual-header5
104-
; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
114+
; CHECK-NEXT: s_cbranch_execz .LBB0_11
115+
; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual-header5
116+
; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
105117
; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
106118
; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9]
107-
; CHECK-NEXT: s_cbranch_execz .LBB0_8
108-
; CHECK-NEXT: ; %bb.15: ; %loop-memcpy-residual4.preheader
109-
; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
119+
; CHECK-NEXT: s_cbranch_execz .LBB0_10
120+
; CHECK-NEXT: ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge
121+
; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
122+
; CHECK-NEXT: flat_load_ubyte v2, v[8:9]
110123
; CHECK-NEXT: s_mov_b64 s[12:13], 0
111124
; CHECK-NEXT: s_mov_b64 s[14:15], 0
112-
; CHECK-NEXT: .LBB0_16: ; %loop-memcpy-residual4
113-
; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1
125+
; CHECK-NEXT: s_waitcnt vmcnt(0)
126+
; CHECK-NEXT: .LBB0_18: ; %loop-memcpy-residual4
127+
; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1
114128
; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
115-
; CHECK-NEXT: v_mov_b32_e32 v10, s15
116-
; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s14, v0
117-
; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v1, v10, vcc
118-
; CHECK-NEXT: flat_load_ubyte v11, v[8:9]
119-
; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s14, v6
129+
; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s14, v12
120130
; CHECK-NEXT: s_add_u32 s14, s14, 1
131+
; CHECK-NEXT: v_mov_b32_e32 v4, s15
121132
; CHECK-NEXT: s_addc_u32 s15, s15, 0
122-
; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3]
123-
; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v7, v10, vcc
133+
; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7]
134+
; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v4, vcc
124135
; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
125-
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
126-
; CHECK-NEXT: flat_store_byte v[8:9], v11
136+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
137+
; CHECK-NEXT: flat_store_byte v[3:4], v2
127138
; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13]
128-
; CHECK-NEXT: s_cbranch_execnz .LBB0_16
129-
; CHECK-NEXT: ; %bb.17: ; %Flow
130-
; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1
139+
; CHECK-NEXT: s_cbranch_execnz .LBB0_18
140+
; CHECK-NEXT: ; %bb.19: ; %Flow
141+
; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1
131142
; CHECK-NEXT: s_or_b64 exec, exec, s[12:13]
132-
; CHECK-NEXT: s_branch .LBB0_8
133-
; CHECK-NEXT: .LBB0_18: ; %DummyReturnBlock
143+
; CHECK-NEXT: s_branch .LBB0_10
144+
; CHECK-NEXT: .LBB0_20: ; %DummyReturnBlock
134145
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
135146
; CHECK-NEXT: s_setpc_b64 s[30:31]
136147
entry:
137-
%arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer"], ptr null, i64 0, i64 %idxprom
148+
%arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer"], ptr %ptr, i64 0, i64 %idxprom
138149
%spec.select = tail call i64 @llvm.umin.i64(i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) inttoptr (i64 32 to ptr addrspace(4)) to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 56)
139150
tail call void @llvm.memcpy.p0.p0.i64(ptr %arrayidx, ptr null, i64 %spec.select, i1 false)
140151
br label %while.cond

0 commit comments

Comments
 (0)