|
3 | 3 |
|
4 | 4 | %"struct.__llvm_libc::rpc::Buffer" = type { [8 x i64] }
|
5 | 5 |
|
6 |
| -define void @issue63986(i64 %0, i64 %idxprom) null_pointer_is_valid { |
| 6 | +define void @issue63986(i64 %0, i64 %idxprom, ptr inreg %ptr) { |
7 | 7 | ; CHECK-LABEL: issue63986:
|
8 | 8 | ; CHECK: ; %bb.0: ; %entry
|
9 | 9 | ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
10 |
| -; CHECK-NEXT: v_lshlrev_b64 v[4:5], 6, v[2:3] |
| 10 | +; CHECK-NEXT: v_lshlrev_b64 v[8:9], 6, v[2:3] |
| 11 | +; CHECK-NEXT: v_mov_b32_e32 v4, s17 |
| 12 | +; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s16, v8 |
| 13 | +; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v4, v9, vcc |
| 14 | +; CHECK-NEXT: ; %bb.1: ; %entry.loop-memcpy-expansion_crit_edge |
| 15 | +; CHECK-NEXT: v_mov_b32_e32 v4, 0 |
| 16 | +; CHECK-NEXT: v_mov_b32_e32 v5, 0 |
| 17 | +; CHECK-NEXT: flat_load_dwordx4 v[4:7], v[4:5] |
11 | 18 | ; CHECK-NEXT: s_mov_b64 s[4:5], 0
|
12 |
| -; CHECK-NEXT: .LBB0_1: ; %loop-memcpy-expansion |
| 19 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 20 | +; CHECK-NEXT: .LBB0_2: ; %loop-memcpy-expansion |
13 | 21 | ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
|
14 |
| -; CHECK-NEXT: v_mov_b32_e32 v7, s5 |
15 |
| -; CHECK-NEXT: v_mov_b32_e32 v6, s4 |
16 |
| -; CHECK-NEXT: flat_load_dwordx4 v[6:9], v[6:7] |
17 |
| -; CHECK-NEXT: v_add_co_u32_e32 v10, vcc, s4, v4 |
| 22 | +; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s4, v10 |
18 | 23 | ; CHECK-NEXT: s_add_u32 s4, s4, 16
|
19 |
| -; CHECK-NEXT: v_mov_b32_e32 v11, s5 |
| 24 | +; CHECK-NEXT: v_mov_b32_e32 v13, s5 |
20 | 25 | ; CHECK-NEXT: s_addc_u32 s5, s5, 0
|
21 | 26 | ; CHECK-NEXT: v_cmp_ge_u64_e64 s[6:7], s[4:5], 32
|
22 |
| -; CHECK-NEXT: v_addc_co_u32_e32 v11, vcc, v5, v11, vcc |
| 27 | +; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v11, v13, vcc |
23 | 28 | ; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
|
24 |
| -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
25 |
| -; CHECK-NEXT: flat_store_dwordx4 v[10:11], v[6:9] |
26 |
| -; CHECK-NEXT: s_cbranch_vccz .LBB0_1 |
27 |
| -; CHECK-NEXT: ; %bb.2: ; %loop-memcpy-residual-header |
28 |
| -; CHECK-NEXT: s_branch .LBB0_4 |
29 |
| -; CHECK-NEXT: ; %bb.3: |
30 |
| -; CHECK-NEXT: ; implicit-def: $vgpr6_vgpr7 |
| 29 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 30 | +; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[4:7] |
| 31 | +; CHECK-NEXT: s_cbranch_vccz .LBB0_2 |
| 32 | +; CHECK-NEXT: ; %bb.3: ; %loop-memcpy-residual-header |
31 | 33 | ; CHECK-NEXT: s_branch .LBB0_5
|
32 |
| -; CHECK-NEXT: .LBB0_4: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge |
33 |
| -; CHECK-NEXT: v_lshlrev_b64 v[6:7], 6, v[2:3] |
34 |
| -; CHECK-NEXT: s_cbranch_execnz .LBB0_7 |
35 |
| -; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual.preheader |
36 |
| -; CHECK-NEXT: v_or_b32_e32 v2, 32, v4 |
37 |
| -; CHECK-NEXT: v_mov_b32_e32 v3, v5 |
| 34 | +; CHECK-NEXT: ; %bb.4: |
| 35 | +; CHECK-NEXT: ; implicit-def: $vgpr2_vgpr3 |
| 36 | +; CHECK-NEXT: s_branch .LBB0_6 |
| 37 | +; CHECK-NEXT: .LBB0_5: ; %loop-memcpy-residual-header.post-loop-memcpy-expansion_crit_edge |
| 38 | +; CHECK-NEXT: v_lshlrev_b64 v[2:3], 6, v[2:3] |
| 39 | +; CHECK-NEXT: s_cbranch_execnz .LBB0_9 |
| 40 | +; CHECK-NEXT: .LBB0_6: ; %loop-memcpy-residual-header.loop-memcpy-residual_crit_edge |
| 41 | +; CHECK-NEXT: v_mov_b32_e32 v2, 0 |
| 42 | +; CHECK-NEXT: v_mov_b32_e32 v3, 0 |
| 43 | +; CHECK-NEXT: flat_load_ubyte v2, v[2:3] |
| 44 | +; CHECK-NEXT: s_add_u32 s6, s16, 32 |
| 45 | +; CHECK-NEXT: s_addc_u32 s4, s17, 0 |
| 46 | +; CHECK-NEXT: v_mov_b32_e32 v4, s4 |
| 47 | +; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s6, v8 |
38 | 48 | ; CHECK-NEXT: s_mov_b64 s[4:5], 0
|
39 |
| -; CHECK-NEXT: ; %bb.6: ; %loop-memcpy-residual |
40 |
| -; CHECK-NEXT: s_add_u32 s6, 32, s4 |
41 |
| -; CHECK-NEXT: s_addc_u32 s7, 0, s5 |
42 |
| -; CHECK-NEXT: v_mov_b32_e32 v6, s6 |
43 |
| -; CHECK-NEXT: v_mov_b32_e32 v7, s7 |
44 |
| -; CHECK-NEXT: flat_load_ubyte v10, v[6:7] |
45 |
| -; CHECK-NEXT: v_mov_b32_e32 v9, s5 |
46 |
| -; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s4, v2 |
47 |
| -; CHECK-NEXT: v_mov_b32_e32 v7, v5 |
48 |
| -; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v3, v9, vcc |
| 49 | +; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v9, vcc |
| 50 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 51 | +; CHECK-NEXT: ; %bb.7: ; %loop-memcpy-residual |
| 52 | +; CHECK-NEXT: v_mov_b32_e32 v6, s5 |
| 53 | +; CHECK-NEXT: v_add_co_u32_e32 v5, vcc, s4, v3 |
49 | 54 | ; CHECK-NEXT: s_add_u32 s4, s4, 1
|
50 |
| -; CHECK-NEXT: v_mov_b32_e32 v6, v4 |
| 55 | +; CHECK-NEXT: v_addc_co_u32_e32 v6, vcc, v4, v6, vcc |
51 | 56 | ; CHECK-NEXT: s_addc_u32 s5, s5, 0
|
52 |
| -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
53 |
| -; CHECK-NEXT: flat_store_byte v[8:9], v10 |
54 |
| -; CHECK-NEXT: .LBB0_7: ; %post-loop-memcpy-expansion |
55 |
| -; CHECK-NEXT: v_and_b32_e32 v2, 15, v0 |
56 |
| -; CHECK-NEXT: v_mov_b32_e32 v3, 0 |
| 57 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 58 | +; CHECK-NEXT: flat_store_byte v[5:6], v2 |
| 59 | +; CHECK-NEXT: ; %bb.8: |
| 60 | +; CHECK-NEXT: v_mov_b32_e32 v2, v8 |
| 61 | +; CHECK-NEXT: v_mov_b32_e32 v3, v9 |
| 62 | +; CHECK-NEXT: .LBB0_9: ; %post-loop-memcpy-expansion |
| 63 | +; CHECK-NEXT: v_and_b32_e32 v6, 15, v0 |
57 | 64 | ; CHECK-NEXT: v_and_b32_e32 v0, -16, v0
|
| 65 | +; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v2, v0 |
| 66 | +; CHECK-NEXT: v_mov_b32_e32 v7, 0 |
| 67 | +; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v1, vcc |
58 | 68 | ; CHECK-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[0:1]
|
59 |
| -; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[2:3] |
60 |
| -; CHECK-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0 |
61 |
| -; CHECK-NEXT: v_addc_co_u32_e32 v7, vcc, v7, v1, vcc |
62 |
| -; CHECK-NEXT: s_branch .LBB0_10 |
63 |
| -; CHECK-NEXT: .LBB0_8: ; %Flow14 |
64 |
| -; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 |
| 69 | +; CHECK-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[6:7] |
| 70 | +; CHECK-NEXT: v_mov_b32_e32 v4, s17 |
| 71 | +; CHECK-NEXT: v_mov_b32_e32 v8, 0 |
| 72 | +; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s16, v2 |
| 73 | +; CHECK-NEXT: v_mov_b32_e32 v9, 0 |
| 74 | +; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v4, v3, vcc |
| 75 | +; CHECK-NEXT: s_branch .LBB0_12 |
| 76 | +; CHECK-NEXT: .LBB0_10: ; %Flow14 |
| 77 | +; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 |
65 | 78 | ; CHECK-NEXT: s_or_b64 exec, exec, s[10:11]
|
66 | 79 | ; CHECK-NEXT: s_mov_b64 s[8:9], 0
|
67 |
| -; CHECK-NEXT: .LBB0_9: ; %Flow16 |
68 |
| -; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 |
| 80 | +; CHECK-NEXT: .LBB0_11: ; %Flow16 |
| 81 | +; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 |
69 | 82 | ; CHECK-NEXT: s_andn2_b64 vcc, exec, s[8:9]
|
70 |
| -; CHECK-NEXT: s_cbranch_vccz .LBB0_18 |
71 |
| -; CHECK-NEXT: .LBB0_10: ; %while.cond |
| 83 | +; CHECK-NEXT: s_cbranch_vccz .LBB0_20 |
| 84 | +; CHECK-NEXT: .LBB0_12: ; %while.cond |
72 | 85 | ; CHECK-NEXT: ; =>This Loop Header: Depth=1
|
73 |
| -; CHECK-NEXT: ; Child Loop BB0_12 Depth 2 |
74 |
| -; CHECK-NEXT: ; Child Loop BB0_16 Depth 2 |
| 86 | +; CHECK-NEXT: ; Child Loop BB0_14 Depth 2 |
| 87 | +; CHECK-NEXT: ; Child Loop BB0_18 Depth 2 |
75 | 88 | ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
|
76 |
| -; CHECK-NEXT: s_cbranch_execz .LBB0_13 |
77 |
| -; CHECK-NEXT: ; %bb.11: ; %loop-memcpy-expansion2.preheader |
78 |
| -; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 |
| 89 | +; CHECK-NEXT: s_cbranch_execz .LBB0_15 |
| 90 | +; CHECK-NEXT: ; %bb.13: ; %while.cond.loop-memcpy-expansion2_crit_edge |
| 91 | +; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 |
| 92 | +; CHECK-NEXT: flat_load_dwordx4 v[2:5], v[8:9] |
79 | 93 | ; CHECK-NEXT: s_mov_b64 s[10:11], 0
|
80 | 94 | ; CHECK-NEXT: s_mov_b64 s[12:13], 0
|
81 |
| -; CHECK-NEXT: .LBB0_12: ; %loop-memcpy-expansion2 |
82 |
| -; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1 |
| 95 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 96 | +; CHECK-NEXT: .LBB0_14: ; %loop-memcpy-expansion2 |
| 97 | +; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1 |
83 | 98 | ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
|
84 |
| -; CHECK-NEXT: v_mov_b32_e32 v8, s12 |
85 |
| -; CHECK-NEXT: v_mov_b32_e32 v9, s13 |
86 |
| -; CHECK-NEXT: flat_load_dwordx4 v[8:11], v[8:9] |
87 |
| -; CHECK-NEXT: v_mov_b32_e32 v13, s13 |
88 |
| -; CHECK-NEXT: v_add_co_u32_e32 v12, vcc, s12, v4 |
| 99 | +; CHECK-NEXT: v_mov_b32_e32 v15, s13 |
| 100 | +; CHECK-NEXT: v_add_co_u32_e32 v14, vcc, s12, v10 |
89 | 101 | ; CHECK-NEXT: s_add_u32 s12, s12, 16
|
90 |
| -; CHECK-NEXT: v_addc_co_u32_e32 v13, vcc, v5, v13, vcc |
| 102 | +; CHECK-NEXT: v_addc_co_u32_e32 v15, vcc, v11, v15, vcc |
91 | 103 | ; CHECK-NEXT: s_addc_u32 s13, s13, 0
|
92 | 104 | ; CHECK-NEXT: v_cmp_ge_u64_e32 vcc, s[12:13], v[0:1]
|
| 105 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 106 | +; CHECK-NEXT: flat_store_dwordx4 v[14:15], v[2:5] |
93 | 107 | ; CHECK-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
|
94 |
| -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
95 |
| -; CHECK-NEXT: flat_store_dwordx4 v[12:13], v[8:11] |
96 | 108 | ; CHECK-NEXT: s_andn2_b64 exec, exec, s[10:11]
|
97 |
| -; CHECK-NEXT: s_cbranch_execnz .LBB0_12 |
98 |
| -; CHECK-NEXT: .LBB0_13: ; %Flow15 |
99 |
| -; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 |
| 109 | +; CHECK-NEXT: s_cbranch_execnz .LBB0_14 |
| 110 | +; CHECK-NEXT: .LBB0_15: ; %Flow15 |
| 111 | +; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 |
100 | 112 | ; CHECK-NEXT: s_or_b64 exec, exec, s[8:9]
|
101 | 113 | ; CHECK-NEXT: s_mov_b64 s[8:9], -1
|
102 |
| -; CHECK-NEXT: s_cbranch_execz .LBB0_9 |
103 |
| -; CHECK-NEXT: ; %bb.14: ; %loop-memcpy-residual-header5 |
104 |
| -; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 |
| 114 | +; CHECK-NEXT: s_cbranch_execz .LBB0_11 |
| 115 | +; CHECK-NEXT: ; %bb.16: ; %loop-memcpy-residual-header5 |
| 116 | +; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 |
105 | 117 | ; CHECK-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
|
106 | 118 | ; CHECK-NEXT: s_xor_b64 s[10:11], exec, s[8:9]
|
107 |
| -; CHECK-NEXT: s_cbranch_execz .LBB0_8 |
108 |
| -; CHECK-NEXT: ; %bb.15: ; %loop-memcpy-residual4.preheader |
109 |
| -; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 |
| 119 | +; CHECK-NEXT: s_cbranch_execz .LBB0_10 |
| 120 | +; CHECK-NEXT: ; %bb.17: ; %loop-memcpy-residual-header5.loop-memcpy-residual4_crit_edge |
| 121 | +; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 |
| 122 | +; CHECK-NEXT: flat_load_ubyte v2, v[8:9] |
110 | 123 | ; CHECK-NEXT: s_mov_b64 s[12:13], 0
|
111 | 124 | ; CHECK-NEXT: s_mov_b64 s[14:15], 0
|
112 |
| -; CHECK-NEXT: .LBB0_16: ; %loop-memcpy-residual4 |
113 |
| -; CHECK-NEXT: ; Parent Loop BB0_10 Depth=1 |
| 125 | +; CHECK-NEXT: s_waitcnt vmcnt(0) |
| 126 | +; CHECK-NEXT: .LBB0_18: ; %loop-memcpy-residual4 |
| 127 | +; CHECK-NEXT: ; Parent Loop BB0_12 Depth=1 |
114 | 128 | ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2
|
115 |
| -; CHECK-NEXT: v_mov_b32_e32 v10, s15 |
116 |
| -; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s14, v0 |
117 |
| -; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v1, v10, vcc |
118 |
| -; CHECK-NEXT: flat_load_ubyte v11, v[8:9] |
119 |
| -; CHECK-NEXT: v_add_co_u32_e32 v8, vcc, s14, v6 |
| 129 | +; CHECK-NEXT: v_add_co_u32_e32 v3, vcc, s14, v12 |
120 | 130 | ; CHECK-NEXT: s_add_u32 s14, s14, 1
|
| 131 | +; CHECK-NEXT: v_mov_b32_e32 v4, s15 |
121 | 132 | ; CHECK-NEXT: s_addc_u32 s15, s15, 0
|
122 |
| -; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[2:3] |
123 |
| -; CHECK-NEXT: v_addc_co_u32_e32 v9, vcc, v7, v10, vcc |
| 133 | +; CHECK-NEXT: v_cmp_ge_u64_e64 s[8:9], s[14:15], v[6:7] |
| 134 | +; CHECK-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v4, vcc |
124 | 135 | ; CHECK-NEXT: s_or_b64 s[12:13], s[8:9], s[12:13]
|
125 |
| -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) |
126 |
| -; CHECK-NEXT: flat_store_byte v[8:9], v11 |
| 136 | +; CHECK-NEXT: s_waitcnt lgkmcnt(0) |
| 137 | +; CHECK-NEXT: flat_store_byte v[3:4], v2 |
127 | 138 | ; CHECK-NEXT: s_andn2_b64 exec, exec, s[12:13]
|
128 |
| -; CHECK-NEXT: s_cbranch_execnz .LBB0_16 |
129 |
| -; CHECK-NEXT: ; %bb.17: ; %Flow |
130 |
| -; CHECK-NEXT: ; in Loop: Header=BB0_10 Depth=1 |
| 139 | +; CHECK-NEXT: s_cbranch_execnz .LBB0_18 |
| 140 | +; CHECK-NEXT: ; %bb.19: ; %Flow |
| 141 | +; CHECK-NEXT: ; in Loop: Header=BB0_12 Depth=1 |
131 | 142 | ; CHECK-NEXT: s_or_b64 exec, exec, s[12:13]
|
132 |
| -; CHECK-NEXT: s_branch .LBB0_8 |
133 |
| -; CHECK-NEXT: .LBB0_18: ; %DummyReturnBlock |
| 143 | +; CHECK-NEXT: s_branch .LBB0_10 |
| 144 | +; CHECK-NEXT: .LBB0_20: ; %DummyReturnBlock |
134 | 145 | ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
|
135 | 146 | ; CHECK-NEXT: s_setpc_b64 s[30:31]
|
136 | 147 | entry:
|
137 |
| - %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer"], ptr null, i64 0, i64 %idxprom |
| 148 | + %arrayidx = getelementptr [32 x %"struct.__llvm_libc::rpc::Buffer"], ptr %ptr, i64 0, i64 %idxprom |
138 | 149 | %spec.select = tail call i64 @llvm.umin.i64(i64 sub (i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) inttoptr (i64 32 to ptr addrspace(4)) to ptr) to i64), i64 ptrtoint (ptr addrspacecast (ptr addrspace(4) null to ptr) to i64)), i64 56)
|
139 | 150 | tail call void @llvm.memcpy.p0.p0.i64(ptr %arrayidx, ptr null, i64 %spec.select, i1 false)
|
140 | 151 | br label %while.cond
|
|
0 commit comments