|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX940 %s |
| 3 | + |
| 4 | +define amdgpu_kernel void @preload_block_count_x(ptr addrspace(1) inreg noundef %dst.coerce, ptr addrspace(1) inreg noundef %src.coerce, i64 inreg noundef %nElts, i64 inreg noundef %redOpArg, i1 inreg noundef %redOpArgIsPtr) #0 !dbg !4 { |
| 5 | +; GFX940-LABEL: preload_block_count_x: |
| 6 | +; GFX940: .Lfunc_begin0: |
| 7 | +; GFX940-NEXT: .file 0 "/" "<stdin>" |
| 8 | +; GFX940-NEXT: .cfi_sections .debug_frame |
| 9 | +; GFX940-NEXT: .cfi_startproc |
| 10 | +; GFX940-NEXT: ; %bb.5: |
| 11 | +; GFX940-NEXT: .loc 0 1 0 prologue_end ; <stdin>:1:0 |
| 12 | +; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 |
| 13 | +; GFX940-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x8 |
| 14 | +; GFX940-NEXT: s_load_dword s12, s[0:1], 0x28 |
| 15 | +; GFX940-NEXT: s_waitcnt lgkmcnt(0) |
| 16 | +; GFX940-NEXT: s_branch .LBB0_0 |
| 17 | +; GFX940-NEXT: .loc 0 0 0 is_stmt 0 ; :0:0 |
| 18 | +; GFX940-NEXT: .Ltmp0: |
| 19 | +; GFX940-NEXT: .p2align 8 |
| 20 | +; GFX940-NEXT: ; %bb.6: |
| 21 | +; GFX940-NEXT: .LBB0_0: ; %entry |
| 22 | +; GFX940-NEXT: .cfi_escape 0x0f, 0x04, 0x30, 0x36, 0xe9, 0x02 ; |
| 23 | +; GFX940-NEXT: .cfi_undefined 16 |
| 24 | +; GFX940-NEXT: s_mov_b32 s0, s13 |
| 25 | +; GFX940-NEXT: .Ltmp1: |
| 26 | +; GFX940-NEXT: ;DEBUG_VALUE: test:var <- [DW_OP_LLVM_poisoned] $sgpr2_sgpr3 |
| 27 | +; GFX940-NEXT: .loc 0 1 0 is_stmt 1 ; <stdin>:1:0 |
| 28 | +; GFX940-NEXT: s_ashr_i32 s13, s12, 31 |
| 29 | +; GFX940-NEXT: s_or_b64 s[8:9], s[6:7], s[12:13] |
| 30 | +; GFX940-NEXT: s_mov_b32 s8, 0 |
| 31 | +; GFX940-NEXT: s_cmp_lg_u64 s[8:9], 0 |
| 32 | +; GFX940-NEXT: s_cbranch_scc0 .LBB0_4 |
| 33 | +; GFX940-NEXT: .Ltmp2: |
| 34 | +; GFX940-NEXT: ; %bb.1: |
| 35 | +; GFX940-NEXT: ;DEBUG_VALUE: test:var <- [DW_OP_LLVM_poisoned] $sgpr2_sgpr3 |
| 36 | +; GFX940-NEXT: v_cvt_f32_u32_e32 v0, s12 |
| 37 | +; GFX940-NEXT: v_cvt_f32_u32_e32 v1, s13 |
| 38 | +; GFX940-NEXT: s_sub_u32 s1, 0, s12 |
| 39 | +; GFX940-NEXT: s_subb_u32 s3, 0, s13 |
| 40 | +; GFX940-NEXT: .Ltmp3: |
| 41 | +; GFX940-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0 |
| 42 | +; GFX940-NEXT: v_rcp_f32_e32 v0, v0 |
| 43 | +; GFX940-NEXT: s_nop 0 |
| 44 | +; GFX940-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 |
| 45 | +; GFX940-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 |
| 46 | +; GFX940-NEXT: v_trunc_f32_e32 v1, v1 |
| 47 | +; GFX940-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0 |
| 48 | +; GFX940-NEXT: v_cvt_u32_f32_e32 v1, v1 |
| 49 | +; GFX940-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| 50 | +; GFX940-NEXT: v_readfirstlane_b32 s5, v1 |
| 51 | +; GFX940-NEXT: v_readfirstlane_b32 s8, v0 |
| 52 | +; GFX940-NEXT: s_mul_i32 s9, s1, s5 |
| 53 | +; GFX940-NEXT: s_mul_hi_u32 s15, s1, s8 |
| 54 | +; GFX940-NEXT: s_mul_i32 s14, s3, s8 |
| 55 | +; GFX940-NEXT: s_add_i32 s9, s15, s9 |
| 56 | +; GFX940-NEXT: s_add_i32 s9, s9, s14 |
| 57 | +; GFX940-NEXT: s_mul_i32 s16, s1, s8 |
| 58 | +; GFX940-NEXT: s_mul_hi_u32 s14, s8, s9 |
| 59 | +; GFX940-NEXT: s_mul_i32 s15, s8, s9 |
| 60 | +; GFX940-NEXT: s_mul_hi_u32 s8, s8, s16 |
| 61 | +; GFX940-NEXT: s_add_u32 s8, s8, s15 |
| 62 | +; GFX940-NEXT: s_addc_u32 s14, 0, s14 |
| 63 | +; GFX940-NEXT: s_mul_hi_u32 s17, s5, s16 |
| 64 | +; GFX940-NEXT: s_mul_i32 s16, s5, s16 |
| 65 | +; GFX940-NEXT: s_add_u32 s8, s8, s16 |
| 66 | +; GFX940-NEXT: s_mul_hi_u32 s15, s5, s9 |
| 67 | +; GFX940-NEXT: s_addc_u32 s8, s14, s17 |
| 68 | +; GFX940-NEXT: s_addc_u32 s14, s15, 0 |
| 69 | +; GFX940-NEXT: s_mul_i32 s9, s5, s9 |
| 70 | +; GFX940-NEXT: s_add_u32 s8, s8, s9 |
| 71 | +; GFX940-NEXT: s_addc_u32 s9, 0, s14 |
| 72 | +; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, s8, v0 |
| 73 | +; GFX940-NEXT: s_cmp_lg_u64 vcc, 0 |
| 74 | +; GFX940-NEXT: s_addc_u32 s5, s5, s9 |
| 75 | +; GFX940-NEXT: v_readfirstlane_b32 s9, v0 |
| 76 | +; GFX940-NEXT: s_mul_i32 s8, s1, s5 |
| 77 | +; GFX940-NEXT: s_mul_hi_u32 s14, s1, s9 |
| 78 | +; GFX940-NEXT: s_add_i32 s8, s14, s8 |
| 79 | +; GFX940-NEXT: s_mul_i32 s3, s3, s9 |
| 80 | +; GFX940-NEXT: s_add_i32 s8, s8, s3 |
| 81 | +; GFX940-NEXT: s_mul_i32 s1, s1, s9 |
| 82 | +; GFX940-NEXT: s_mul_hi_u32 s14, s5, s1 |
| 83 | +; GFX940-NEXT: s_mul_i32 s15, s5, s1 |
| 84 | +; GFX940-NEXT: s_mul_i32 s17, s9, s8 |
| 85 | +; GFX940-NEXT: s_mul_hi_u32 s1, s9, s1 |
| 86 | +; GFX940-NEXT: s_mul_hi_u32 s16, s9, s8 |
| 87 | +; GFX940-NEXT: s_add_u32 s1, s1, s17 |
| 88 | +; GFX940-NEXT: s_addc_u32 s9, 0, s16 |
| 89 | +; GFX940-NEXT: s_add_u32 s1, s1, s15 |
| 90 | +; GFX940-NEXT: s_mul_hi_u32 s3, s5, s8 |
| 91 | +; GFX940-NEXT: s_addc_u32 s1, s9, s14 |
| 92 | +; GFX940-NEXT: s_addc_u32 s3, s3, 0 |
| 93 | +; GFX940-NEXT: s_mul_i32 s8, s5, s8 |
| 94 | +; GFX940-NEXT: s_add_u32 s1, s1, s8 |
| 95 | +; GFX940-NEXT: s_addc_u32 s3, 0, s3 |
| 96 | +; GFX940-NEXT: v_add_co_u32_e32 v0, vcc, s1, v0 |
| 97 | +; GFX940-NEXT: s_cmp_lg_u64 vcc, 0 |
| 98 | +; GFX940-NEXT: s_addc_u32 s1, s5, s3 |
| 99 | +; GFX940-NEXT: v_readfirstlane_b32 s8, v0 |
| 100 | +; GFX940-NEXT: s_mul_i32 s5, s6, s1 |
| 101 | +; GFX940-NEXT: s_mul_hi_u32 s9, s6, s8 |
| 102 | +; GFX940-NEXT: s_mul_hi_u32 s3, s6, s1 |
| 103 | +; GFX940-NEXT: s_add_u32 s5, s9, s5 |
| 104 | +; GFX940-NEXT: s_addc_u32 s3, 0, s3 |
| 105 | +; GFX940-NEXT: s_mul_hi_u32 s14, s7, s8 |
| 106 | +; GFX940-NEXT: s_mul_i32 s8, s7, s8 |
| 107 | +; GFX940-NEXT: s_add_u32 s5, s5, s8 |
| 108 | +; GFX940-NEXT: s_mul_hi_u32 s9, s7, s1 |
| 109 | +; GFX940-NEXT: s_addc_u32 s3, s3, s14 |
| 110 | +; GFX940-NEXT: s_addc_u32 s5, s9, 0 |
| 111 | +; GFX940-NEXT: s_mul_i32 s1, s7, s1 |
| 112 | +; GFX940-NEXT: s_add_u32 s1, s3, s1 |
| 113 | +; GFX940-NEXT: s_addc_u32 s3, 0, s5 |
| 114 | +; GFX940-NEXT: s_mul_i32 s5, s12, s3 |
| 115 | +; GFX940-NEXT: s_mul_hi_u32 s8, s12, s1 |
| 116 | +; GFX940-NEXT: s_add_i32 s5, s8, s5 |
| 117 | +; GFX940-NEXT: s_mul_i32 s8, s13, s1 |
| 118 | +; GFX940-NEXT: s_mul_i32 s9, s12, s1 |
| 119 | +; GFX940-NEXT: s_add_i32 s5, s5, s8 |
| 120 | +; GFX940-NEXT: v_mov_b32_e32 v0, s9 |
| 121 | +; GFX940-NEXT: s_sub_i32 s8, s7, s5 |
| 122 | +; GFX940-NEXT: v_sub_co_u32_e32 v0, vcc, s6, v0 |
| 123 | +; GFX940-NEXT: s_cmp_lg_u64 vcc, 0 |
| 124 | +; GFX940-NEXT: s_subb_u32 s14, s8, s13 |
| 125 | +; GFX940-NEXT: v_subrev_co_u32_e64 v1, s[8:9], s12, v0 |
| 126 | +; GFX940-NEXT: s_cmp_lg_u64 s[8:9], 0 |
| 127 | +; GFX940-NEXT: s_subb_u32 s8, s14, 0 |
| 128 | +; GFX940-NEXT: s_cmp_ge_u32 s8, s13 |
| 129 | +; GFX940-NEXT: v_readfirstlane_b32 s14, v1 |
| 130 | +; GFX940-NEXT: s_cselect_b32 s9, -1, 0 |
| 131 | +; GFX940-NEXT: s_cmp_ge_u32 s14, s12 |
| 132 | +; GFX940-NEXT: s_cselect_b32 s14, -1, 0 |
| 133 | +; GFX940-NEXT: s_cmp_eq_u32 s8, s13 |
| 134 | +; GFX940-NEXT: s_cselect_b32 s8, s14, s9 |
| 135 | +; GFX940-NEXT: s_add_u32 s9, s1, 1 |
| 136 | +; GFX940-NEXT: s_addc_u32 s14, s3, 0 |
| 137 | +; GFX940-NEXT: s_add_u32 s15, s1, 2 |
| 138 | +; GFX940-NEXT: s_addc_u32 s16, s3, 0 |
| 139 | +; GFX940-NEXT: s_cmp_lg_u32 s8, 0 |
| 140 | +; GFX940-NEXT: s_cselect_b32 s8, s15, s9 |
| 141 | +; GFX940-NEXT: s_cselect_b32 s9, s16, s14 |
| 142 | +; GFX940-NEXT: s_cmp_lg_u64 vcc, 0 |
| 143 | +; GFX940-NEXT: s_subb_u32 s5, s7, s5 |
| 144 | +; GFX940-NEXT: s_cmp_ge_u32 s5, s13 |
| 145 | +; GFX940-NEXT: v_readfirstlane_b32 s15, v0 |
| 146 | +; GFX940-NEXT: s_cselect_b32 s14, -1, 0 |
| 147 | +; GFX940-NEXT: s_cmp_ge_u32 s15, s12 |
| 148 | +; GFX940-NEXT: s_cselect_b32 s15, -1, 0 |
| 149 | +; GFX940-NEXT: s_cmp_eq_u32 s5, s13 |
| 150 | +; GFX940-NEXT: s_cselect_b32 s5, s15, s14 |
| 151 | +; GFX940-NEXT: s_cmp_lg_u32 s5, 0 |
| 152 | +; GFX940-NEXT: s_cselect_b32 s9, s9, s3 |
| 153 | +; GFX940-NEXT: s_cselect_b32 s8, s8, s1 |
| 154 | +; GFX940-NEXT: s_cbranch_execnz .LBB0_3 |
| 155 | +; GFX940-NEXT: .LBB0_2: |
| 156 | +; GFX940-NEXT: v_cvt_f32_u32_e32 v0, s12 |
| 157 | +; GFX940-NEXT: s_sub_i32 s1, 0, s12 |
| 158 | +; GFX940-NEXT: s_mov_b32 s9, 0 |
| 159 | +; GFX940-NEXT: v_rcp_iflag_f32_e32 v0, v0 |
| 160 | +; GFX940-NEXT: s_nop 0 |
| 161 | +; GFX940-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 |
| 162 | +; GFX940-NEXT: v_cvt_u32_f32_e32 v0, v0 |
| 163 | +; GFX940-NEXT: s_nop 0 |
| 164 | +; GFX940-NEXT: v_readfirstlane_b32 s3, v0 |
| 165 | +; GFX940-NEXT: s_mul_i32 s1, s1, s3 |
| 166 | +; GFX940-NEXT: s_mul_hi_u32 s1, s3, s1 |
| 167 | +; GFX940-NEXT: s_add_i32 s3, s3, s1 |
| 168 | +; GFX940-NEXT: s_mul_hi_u32 s1, s6, s3 |
| 169 | +; GFX940-NEXT: s_mul_i32 s5, s1, s12 |
| 170 | +; GFX940-NEXT: s_sub_i32 s5, s6, s5 |
| 171 | +; GFX940-NEXT: s_add_i32 s3, s1, 1 |
| 172 | +; GFX940-NEXT: s_sub_i32 s8, s5, s12 |
| 173 | +; GFX940-NEXT: s_cmp_ge_u32 s5, s12 |
| 174 | +; GFX940-NEXT: s_cselect_b32 s1, s3, s1 |
| 175 | +; GFX940-NEXT: s_cselect_b32 s5, s8, s5 |
| 176 | +; GFX940-NEXT: s_add_i32 s3, s1, 1 |
| 177 | +; GFX940-NEXT: s_cmp_ge_u32 s5, s12 |
| 178 | +; GFX940-NEXT: s_cselect_b32 s8, s3, s1 |
| 179 | +; GFX940-NEXT: .LBB0_3: |
| 180 | +; GFX940-NEXT: s_ashr_i32 s1, s0, 31 |
| 181 | +; GFX940-NEXT: s_add_u32 s3, s8, 15 |
| 182 | +; GFX940-NEXT: s_addc_u32 s5, s9, 0 |
| 183 | +; GFX940-NEXT: s_and_b32 s3, s3, -16 |
| 184 | +; GFX940-NEXT: s_mul_i32 s1, s3, s1 |
| 185 | +; GFX940-NEXT: s_mul_hi_u32 s8, s3, s0 |
| 186 | +; GFX940-NEXT: s_add_i32 s1, s8, s1 |
| 187 | +; GFX940-NEXT: s_mul_i32 s5, s5, s0 |
| 188 | +; GFX940-NEXT: s_add_i32 s1, s1, s5 |
| 189 | +; GFX940-NEXT: s_mul_i32 s3, s3, s0 |
| 190 | +; GFX940-NEXT: v_cvt_f64_i32_e32 v[0:1], s1 |
| 191 | +; GFX940-NEXT: v_ldexp_f64 v[0:1], v[0:1], 32 |
| 192 | +; GFX940-NEXT: v_cvt_f64_u32_e32 v[2:3], s3 |
| 193 | +; GFX940-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] |
| 194 | +; GFX940-NEXT: v_cvt_f64_u32_e32 v[2:3], s7 |
| 195 | +; GFX940-NEXT: v_ldexp_f64 v[2:3], v[2:3], 32 |
| 196 | +; GFX940-NEXT: v_cvt_f64_u32_e32 v[4:5], s6 |
| 197 | +; GFX940-NEXT: v_add_f64 v[2:3], v[2:3], v[4:5] |
| 198 | +; GFX940-NEXT: v_min_f64 v[0:1], v[0:1], v[2:3] |
| 199 | +; GFX940-NEXT: v_trunc_f64_e32 v[0:1], v[0:1] |
| 200 | +; GFX940-NEXT: s_movk_i32 s0, 0xffe0 |
| 201 | +; GFX940-NEXT: v_ldexp_f64 v[2:3], v[0:1], s0 |
| 202 | +; GFX940-NEXT: v_floor_f64_e32 v[2:3], v[2:3] |
| 203 | +; GFX940-NEXT: v_fmac_f64_e32 v[0:1], 0xc1f00000, v[2:3] |
| 204 | +; GFX940-NEXT: v_cvt_u32_f64_e32 v0, v[0:1] |
| 205 | +; GFX940-NEXT: v_add_u32_e32 v1, s2, v0 |
| 206 | +; GFX940-NEXT: v_add_u32_e32 v0, s4, v0 |
| 207 | +; GFX940-NEXT: v_or_b32_e32 v0, v0, v1 |
| 208 | +; GFX940-NEXT: v_and_b32_e32 v0, 15, v0 |
| 209 | +; GFX940-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 |
| 210 | +; GFX940-NEXT: s_nop 1 |
| 211 | +; GFX940-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc |
| 212 | +; GFX940-NEXT: ;;#ASMSTART |
| 213 | +; GFX940-NEXT: ;;#ASMEND |
| 214 | +; GFX940-NEXT: s_endpgm |
| 215 | +; GFX940-NEXT: .LBB0_4: |
| 216 | +; GFX940-NEXT: .Ltmp4: |
| 217 | +; GFX940-NEXT: ;DEBUG_VALUE: test:var <- [DW_OP_LLVM_poisoned] $sgpr2_sgpr3 |
| 218 | +; GFX940-NEXT: ; implicit-def: $sgpr8_sgpr9 |
| 219 | +; GFX940-NEXT: s_branch .LBB0_2 |
| 220 | +; GFX940-NEXT: .Ltmp5: |
| 221 | +entry: |
| 222 | + %0 = ptrtoint ptr addrspace(1) %dst.coerce to i64 |
| 223 | + %1 = inttoptr i64 %0 to ptr |
| 224 | + %2 = ptrtoint ptr addrspace(1) %src.coerce to i64 |
| 225 | + #dbg_value(ptr %1, !8, !DIExpression(DIOpArg(0, ptr)), !10) |
| 226 | + %3 = tail call noundef range(i32 0, 1024) i32 @llvm.amdgcn.workitem.id.x(), !dbg !10 |
| 227 | + %4 = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr(), !dbg !10 |
| 228 | + %5 = tail call i32 @llvm.amdgcn.workgroup.id.x(), !dbg !10 |
| 229 | + %6 = load i32, ptr addrspace(4) %4, align 4, !dbg !10 |
| 230 | + %7 = getelementptr inbounds nuw i8, ptr addrspace(4) %4, i64 12, !dbg !10 |
| 231 | + %8 = load i16, ptr addrspace(4) %7, align 4, !dbg !10 |
| 232 | + %conv.i.i = zext i16 %8 to i32, !dbg !10 |
| 233 | + %conv = sext i32 %5 to i64, !dbg !10 |
| 234 | + %conv6 = sext i32 %6 to i64, !dbg !10 |
| 235 | + %div = udiv i64 %nElts, %conv6, !dbg !10 |
| 236 | + %sub.i = add i64 %div, 15, !dbg !10 |
| 237 | + %and.i = and i64 %sub.i, -16, !dbg !10 |
| 238 | + %mul = mul i64 %and.i, %conv, !dbg !10 |
| 239 | + %add8 = add nsw i32 %5, 1, !dbg !10 |
| 240 | + %conv9 = sext i32 %add8 to i64, !dbg !10 |
| 241 | + %mul13 = mul i64 %and.i, %conv9, !dbg !10 |
| 242 | + %conv.i = sitofp i64 %mul to double, !dbg !10 |
| 243 | + %conv1.i = uitofp i64 %nElts to double, !dbg !10 |
| 244 | + %9 = tail call contract noundef double @llvm.minnum.f64(double %conv.i, double %conv1.i), !dbg !10 |
| 245 | + %conv15 = fptosi double %9 to i64, !dbg !10 |
| 246 | + %conv.i43 = sitofp i64 %mul13 to double, !dbg !10 |
| 247 | + %10 = tail call contract noundef double @llvm.minnum.f64(double %conv.i43, double %conv1.i), !dbg !10 |
| 248 | + %add.ptr18 = getelementptr inbounds i8, ptr %1, i64 %conv15, !dbg !10 |
| 249 | + %rem = and i64 %redOpArg, 1, !dbg !10 |
| 250 | + %cmp.not = icmp eq i64 %rem, 0, !dbg !10 |
| 251 | + %rem21 = and i64 %redOpArg, 2, !dbg !10 |
| 252 | + %cmp22.not = icmp eq i64 %rem21, 0, !dbg !10 |
| 253 | + %rem26 = and i64 %redOpArg, 4, !dbg !10 |
| 254 | + %cmp27.not = icmp eq i64 %rem26, 0, !dbg !10 |
| 255 | + %11 = inttoptr i64 %redOpArg to ptr, !dbg !10 |
| 256 | + %12 = load i64, ptr %11, align 8, !dbg !10 |
| 257 | + %conv17 = fptosi double %10 to i64, !dbg !10 |
| 258 | + %sub = sub nsw i64 %conv17, %conv15, !dbg !10 |
| 259 | + %rem.i.i5354 = and i32 %3, 63, !dbg !10 |
| 260 | + %cmp.i.i.not = icmp eq i32 %rem.i.i5354, 0, !dbg !10 |
| 261 | + %13 = add i64 %2, %conv15, !dbg !10 |
| 262 | + %14 = ptrtoint ptr %add.ptr18 to i64, !dbg !10 |
| 263 | + %15 = or i64 %13, %14, !dbg !10 |
| 264 | + %16 = and i64 %15, 15, !dbg !10 |
| 265 | + %and1583.i.i = icmp ne i64 %16, 0, !dbg !10 |
| 266 | + %17 = zext i1 %and1583.i.i to i32, !dbg !10 |
| 267 | + %18 = tail call i32 asm sideeffect "", "=v,0"(i32 %17) #9, !dbg !10 |
| 268 | + %19 = icmp ne i32 %18, 0, !dbg !10 |
| 269 | + %20 = tail call i64 @llvm.amdgcn.ballot.i64(i1 %19), !dbg !10 |
| 270 | + %.not.i.i = icmp eq i64 %20, 0, !dbg !10 |
| 271 | + %div1.i.i.i555659 = lshr i32 %3, 6, !dbg !10 |
| 272 | + %div8.i.i.i = sdiv i64 %sub, 4096, !dbg !10 |
| 273 | + %mul9.i.i.i = shl nsw i64 %div8.i.i.i, 12, !dbg !10 |
| 274 | + %sub12.i.i.i = sub nsw i64 %sub, %mul9.i.i.i, !dbg !10 |
| 275 | + %conv13.i.i.i = zext nneg i32 %div1.i.i.i555659 to i64, !dbg !10 |
| 276 | + %sub14.i.i.i = sub nsw i64 %div8.i.i.i, %conv13.i.i.i, !dbg !10 |
| 277 | + %cmp30399.i.i.i = icmp sgt i64 %sub14.i.i.i, 0, !dbg !10 |
| 278 | + ret void |
| 279 | +} |
| 280 | + |
| 281 | +attributes #0 = { "amdgpu-no-agpr" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" } |
| 282 | + |
| 283 | +!llvm.dbg.cu = !{!0} |
| 284 | +!llvm.module.flags = !{!2, !3} |
| 285 | +!0 = distinct !DICompileUnit(language: DW_LANG_OpenCL, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) |
| 286 | +!1 = !DIFile(filename: "<stdin>", directory: "/") |
| 287 | +!2 = !{i32 7, !"Dwarf Version", i32 5} |
| 288 | +!3 = !{i32 2, !"Debug Info Version", i32 3} |
| 289 | +!4 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !5, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) |
| 290 | +!5 = !DISubroutineType(cc: DW_CC_LLVM_OpenCLKernel, types: !6) |
| 291 | +!6 = !{null} |
| 292 | +!7 = !{i32 1024, i32 1, i32 1} |
| 293 | +!8 = !DILocalVariable(name: "var", arg: 1, scope: !4, file: !1, line: 1, type: !9) |
| 294 | +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) |
| 295 | +!10 = !DILocation(line: 1, scope: !4) |
0 commit comments