|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: opt %s -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu=skx -S -o - | FileCheck %s |
| 3 | + |
| 4 | +; This test verifies that the vectorizer can handle an extended sequence of |
| 5 | +; getelementptr instructions and generate longer vectors. With special handling, |
| 6 | +; some elements can still be vectorized even if they require looking up the |
| 7 | +; common underlying object deeper than 6 levels from the original pointer. |
| 8 | + |
| 9 | +; The test below is the simplified version of actual performance oriented |
| 10 | +; workload; the offsets in getelementptr instructions are similar or same for |
| 11 | +; the test simplicity. |
| 12 | + |
| 13 | +define void @v1_v2_v4_v1_to_v8_levels_6_7_8_8(i32 %arg0, ptr align 16 %arg1) { |
| 14 | +; CHECK-LABEL: define void @v1_v2_v4_v1_to_v8_levels_6_7_8_8( |
| 15 | +; CHECK-SAME: i32 [[ARG0:%.*]], ptr align 16 [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] { |
| 16 | +; CHECK-NEXT: [[LEVEL1:%.*]] = getelementptr i8, ptr [[ARG1]], i32 917504 |
| 17 | +; CHECK-NEXT: [[LEVEL2:%.*]] = getelementptr i8, ptr [[LEVEL1]], i32 [[ARG0]] |
| 18 | +; CHECK-NEXT: [[LEVEL3:%.*]] = getelementptr i8, ptr [[LEVEL2]], i32 32768 |
| 19 | +; CHECK-NEXT: [[LEVEL4:%.*]] = getelementptr i8, ptr [[LEVEL3]], i32 [[ARG0]] |
| 20 | +; CHECK-NEXT: [[LEVEL5:%.*]] = getelementptr i8, ptr [[LEVEL4]], i32 [[ARG0]] |
| 21 | +; CHECK-NEXT: [[A6:%.*]] = getelementptr i8, ptr [[LEVEL5]], i32 [[ARG0]] |
| 22 | +; CHECK-NEXT: store <8 x half> zeroinitializer, ptr [[A6]], align 16 |
| 23 | +; CHECK-NEXT: ret void |
| 24 | +; |
| 25 | + |
| 26 | + %level1 = getelementptr i8, ptr %arg1, i32 917504 |
| 27 | + %level2 = getelementptr i8, ptr %level1, i32 %arg0 |
| 28 | + %level3 = getelementptr i8, ptr %level2, i32 32768 |
| 29 | + %level4 = getelementptr i8, ptr %level3, i32 %arg0 |
| 30 | + %level5 = getelementptr i8, ptr %level4, i32 %arg0 |
| 31 | + |
| 32 | + %a6 = getelementptr i8, ptr %level5, i32 %arg0 |
| 33 | + %b7 = getelementptr i8, ptr %a6, i32 2 |
| 34 | + %c8 = getelementptr i8, ptr %b7, i32 8 |
| 35 | + %d8 = getelementptr i8, ptr %b7, i32 12 |
| 36 | + |
| 37 | + store half 0xH0000, ptr %a6, align 16 |
| 38 | + store <4 x half> zeroinitializer, ptr %b7, align 2 |
| 39 | + store <2 x half> zeroinitializer, ptr %c8, align 2 |
| 40 | + store half 0xH0000, ptr %d8, align 2 |
| 41 | + ret void |
| 42 | +} |
| 43 | + |
| 44 | +define void @v1x8_levels_6_7_8_9_10_11_12_13(i32 %arg0, ptr align 16 %arg1) { |
| 45 | +; CHECK-LABEL: define void @v1x8_levels_6_7_8_9_10_11_12_13( |
| 46 | +; CHECK-SAME: i32 [[ARG0:%.*]], ptr align 16 [[ARG1:%.*]]) #[[ATTR0]] { |
| 47 | +; CHECK-NEXT: [[LEVEL1:%.*]] = getelementptr i8, ptr [[ARG1]], i32 917504 |
| 48 | +; CHECK-NEXT: [[LEVEL2:%.*]] = getelementptr i8, ptr [[LEVEL1]], i32 [[ARG0]] |
| 49 | +; CHECK-NEXT: [[LEVEL3:%.*]] = getelementptr i8, ptr [[LEVEL2]], i32 32768 |
| 50 | +; CHECK-NEXT: [[LEVEL4:%.*]] = getelementptr i8, ptr [[LEVEL3]], i32 [[ARG0]] |
| 51 | +; CHECK-NEXT: [[LEVEL5:%.*]] = getelementptr i8, ptr [[LEVEL4]], i32 [[ARG0]] |
| 52 | +; CHECK-NEXT: [[A6:%.*]] = getelementptr i8, ptr [[LEVEL5]], i32 [[ARG0]] |
| 53 | +; CHECK-NEXT: store <8 x half> zeroinitializer, ptr [[A6]], align 16 |
| 54 | +; CHECK-NEXT: ret void |
| 55 | +; |
| 56 | + |
| 57 | + %level1 = getelementptr i8, ptr %arg1, i32 917504 |
| 58 | + %level2 = getelementptr i8, ptr %level1, i32 %arg0 |
| 59 | + %level3 = getelementptr i8, ptr %level2, i32 32768 |
| 60 | + %level4 = getelementptr i8, ptr %level3, i32 %arg0 |
| 61 | + %level5 = getelementptr i8, ptr %level4, i32 %arg0 |
| 62 | + |
| 63 | + %a6 = getelementptr i8, ptr %level5, i32 %arg0 |
| 64 | + %b7 = getelementptr i8, ptr %a6, i32 2 |
| 65 | + %c8 = getelementptr i8, ptr %b7, i32 2 |
| 66 | + %d9 = getelementptr i8, ptr %c8, i32 2 |
| 67 | + %e10 = getelementptr i8, ptr %d9, i32 2 |
| 68 | + %f11 = getelementptr i8, ptr %e10, i32 2 |
| 69 | + %g12 = getelementptr i8, ptr %f11, i32 2 |
| 70 | + %h13 = getelementptr i8, ptr %g12, i32 2 |
| 71 | + |
| 72 | + store half 0xH0000, ptr %a6, align 16 |
| 73 | + store half 0xH0000, ptr %b7, align 2 |
| 74 | + store half 0xH0000, ptr %c8, align 2 |
| 75 | + store half 0xH0000, ptr %d9, align 2 |
| 76 | + store half 0xH0000, ptr %e10, align 8 |
| 77 | + store half 0xH0000, ptr %f11, align 2 |
| 78 | + store half 0xH0000, ptr %g12, align 2 |
| 79 | + store half 0xH0000, ptr %h13, align 2 |
| 80 | + ret void |
| 81 | +} |
| 82 | + |
| 83 | +define void @v1_4_4_4_2_1_to_v8_8_levels_6_7(i32 %arg0, ptr addrspace(3) align 16 %arg1_ptr, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, half %arg6_half, half %arg7_half, <2 x half> %arg8_2xhalf) { |
| 84 | +; CHECK-LABEL: define void @v1_4_4_4_2_1_to_v8_8_levels_6_7( |
| 85 | +; CHECK-SAME: i32 [[ARG0:%.*]], ptr addrspace(3) align 16 [[ARG1_PTR:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]], i32 [[ARG4:%.*]], i32 [[ARG5:%.*]], half [[ARG6_HALF:%.*]], half [[ARG7_HALF:%.*]], <2 x half> [[ARG8_2XHALF:%.*]]) #[[ATTR0]] { |
| 86 | +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[ARG1_PTR]], i32 458752 |
| 87 | +; CHECK-NEXT: br [[DOTPREHEADER11_PREHEADER:label %.*]] |
| 88 | +; CHECK: [[_PREHEADER11_PREHEADER:.*:]] |
| 89 | +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i32 [[ARG0]], 6 |
| 90 | +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP1]], i32 [[TMP2]] |
| 91 | +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP3]], i32 [[ARG2]] |
| 92 | +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP4]], i32 [[ARG3]] |
| 93 | +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[ARG0]], 2 |
| 94 | +; CHECK-NEXT: br i1 [[CMP]], [[DOTLR_PH:label %.*]], [[DOTEXIT_POINT:label %.*]] |
| 95 | +; CHECK: [[_LR_PH:.*:]] |
| 96 | +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP5]], i32 [[ARG4]] |
| 97 | +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[GEP]], i32 [[ARG5]] |
| 98 | +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x half> poison, half [[ARG6_HALF]], i32 0 |
| 99 | +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x half> [[TMP7]], half 0xH0000, i32 1 |
| 100 | +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <8 x half> [[TMP8]], half 0xH0000, i32 2 |
| 101 | +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <8 x half> [[TMP9]], half 0xH0000, i32 3 |
| 102 | +; CHECK-NEXT: [[TMP11:%.*]] = insertelement <8 x half> [[TMP10]], half 0xH0000, i32 4 |
| 103 | +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x half> [[ARG8_2XHALF]], i32 0 |
| 104 | +; CHECK-NEXT: [[TMP13:%.*]] = insertelement <8 x half> [[TMP11]], half [[TMP12]], i32 5 |
| 105 | +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x half> [[ARG8_2XHALF]], i32 1 |
| 106 | +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <8 x half> [[TMP13]], half [[TMP14]], i32 6 |
| 107 | +; CHECK-NEXT: [[TMP16:%.*]] = insertelement <8 x half> [[TMP15]], half [[ARG7_HALF]], i32 7 |
| 108 | +; CHECK-NEXT: store <8 x half> [[TMP16]], ptr addrspace(3) [[TMP6]], align 2 |
| 109 | +; CHECK-NEXT: br [[DOTEXIT_POINT]] |
| 110 | +; CHECK: [[_EXIT_POINT:.*:]] |
| 111 | +; CHECK-NEXT: ret void |
| 112 | +; |
| 113 | + %base1 = getelementptr inbounds i8, ptr addrspace(3) %arg1_ptr, i32 458752 |
| 114 | + br label %.preheader11.preheader |
| 115 | + |
| 116 | +.preheader11.preheader: |
| 117 | + %base2 = shl nuw nsw i32 %arg0, 6 |
| 118 | + %base3 = getelementptr inbounds i8, ptr addrspace(3) %base1, i32 %base2 |
| 119 | + |
| 120 | + %base4 = getelementptr inbounds i8, ptr addrspace(3) %base3, i32 %arg2 |
| 121 | + %base5 = getelementptr inbounds i8, ptr addrspace(3) %base4, i32 %arg3 |
| 122 | + |
| 123 | + %cmp = icmp sgt i32 %arg0, 2 |
| 124 | + br i1 %cmp, label %.lr.ph, label %.exit_point |
| 125 | + |
| 126 | +.lr.ph: |
| 127 | + %gep = getelementptr inbounds i8, ptr addrspace(3) %base5, i32 %arg4 |
| 128 | + |
| 129 | + %dst = getelementptr inbounds i8, ptr addrspace(3) %gep, i32 %arg5 |
| 130 | + %dst_off2 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 2 |
| 131 | + %dst_off10 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 10 |
| 132 | + %dst_off14 = getelementptr inbounds i8, ptr addrspace(3) %dst, i32 14 |
| 133 | + |
| 134 | + store half %arg6_half, ptr addrspace(3) %dst, align 2 |
| 135 | + store <4 x half> zeroinitializer, ptr addrspace(3) %dst_off2, align 2 |
| 136 | + store <2 x half> %arg8_2xhalf, ptr addrspace(3) %dst_off10, align 2 |
| 137 | + store half %arg7_half, ptr addrspace(3) %dst_off14, align 2 |
| 138 | + br label %.exit_point |
| 139 | + |
| 140 | +.exit_point: |
| 141 | + ret void |
| 142 | +} |
| 143 | + |
| 144 | +; The regression test for merging equivalence classes. It is reduced and adapted |
| 145 | +; for LSV from llvm/test/CodeGen/NVPTX/variadics-backend.ll, which failed at |
| 146 | +; post-commit checks with memory sanitizer on the initial attempt to implement |
| 147 | +; the merging of the equivalence classes. |
| 148 | +define void @variadics1(ptr %vlist) { |
| 149 | +; CHECK-LABEL: define void @variadics1( |
| 150 | +; CHECK-SAME: ptr [[VLIST:%.*]]) #[[ATTR0]] { |
| 151 | +; CHECK-NEXT: [[ARGP_CUR7_ALIGNED2:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[VLIST]], i64 0) |
| 152 | +; CHECK-NEXT: [[ARGP_NEXT8:%.*]] = getelementptr i8, ptr [[ARGP_CUR7_ALIGNED2]], i64 8 |
| 153 | +; CHECK-NEXT: [[X0:%.*]] = getelementptr i8, ptr [[ARGP_NEXT8]], i32 7 |
| 154 | +; CHECK-NEXT: [[ARGP_CUR11_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X0]], i64 0) |
| 155 | +; CHECK-NEXT: [[ARGP_NEXT12:%.*]] = getelementptr i8, ptr [[ARGP_CUR11_ALIGNED]], i64 8 |
| 156 | +; CHECK-NEXT: [[X2:%.*]] = getelementptr i8, ptr [[ARGP_NEXT12]], i32 7 |
| 157 | +; CHECK-NEXT: [[ARGP_CUR16_ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[X2]], i64 0) |
| 158 | +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARGP_CUR16_ALIGNED]], align 4294967296 |
| 159 | +; CHECK-NEXT: [[X31:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 |
| 160 | +; CHECK-NEXT: [[X42:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 |
| 161 | +; CHECK-NEXT: [[X5:%.*]] = fadd double [[X42]], [[X31]] |
| 162 | +; CHECK-NEXT: store double [[X5]], ptr null, align 8 |
| 163 | +; CHECK-NEXT: ret void |
| 164 | +; |
| 165 | + %argp.cur7.aligned2 = call ptr @llvm.ptrmask.p0.i64(ptr %vlist, i64 0) |
| 166 | + %argp.next8 = getelementptr i8, ptr %argp.cur7.aligned2, i64 8 |
| 167 | + %x0 = getelementptr i8, ptr %argp.next8, i32 7 |
| 168 | + %argp.cur11.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %x0, i64 0) |
| 169 | + %argp.next12 = getelementptr i8, ptr %argp.cur11.aligned, i64 8 |
| 170 | + %x2 = getelementptr i8, ptr %argp.next12, i32 7 |
| 171 | + %argp.cur16.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %x2, i64 0) |
| 172 | + %x3 = load double, ptr %argp.cur16.aligned, align 8 |
| 173 | + %argp.cur16.aligned_off8 = getelementptr i8, ptr %argp.cur16.aligned, i32 8 |
| 174 | + %x4 = load double, ptr %argp.cur16.aligned_off8, align 8 |
| 175 | + %x5 = fadd double %x4, %x3 |
| 176 | + store double %x5, ptr null, align 8 |
| 177 | + ret void |
| 178 | +} |
| 179 | + |
| 180 | +declare ptr @llvm.ptrmask.p0.i64(ptr, i64) |
0 commit comments