|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py |
| 2 | +; RUN: opt -passes='default<O3>' -S < %s | FileCheck %s |
| 3 | + |
| 4 | +target triple = "arm64-apple-darwin" |
| 5 | + |
| 6 | +; Make sure we can vectorize a loop that uses a function to clamp a double to |
| 7 | +; be between a given minimum and maximum value. |
| 8 | + |
| 9 | +define internal double @clamp(double %v) { |
| 10 | +entry: |
| 11 | + %retval = alloca double, align 8 |
| 12 | + %v.addr = alloca double, align 8 |
| 13 | + store double %v, double* %v.addr, align 8 |
| 14 | + %0 = load double, double* %v.addr, align 8 |
| 15 | + %cmp = fcmp olt double %0, 0.000000e+00 |
| 16 | + br i1 %cmp, label %if.then, label %if.end |
| 17 | + |
| 18 | +if.then: ; preds = %entry |
| 19 | + store double 0.000000e+00, double* %retval, align 8 |
| 20 | + br label %return |
| 21 | + |
| 22 | +if.end: ; preds = %entry |
| 23 | + %1 = load double, double* %v.addr, align 8 |
| 24 | + %cmp1 = fcmp ogt double %1, 6.000000e+00 |
| 25 | + br i1 %cmp1, label %if.then2, label %if.end3 |
| 26 | + |
| 27 | +if.then2: ; preds = %if.end |
| 28 | + store double 6.000000e+00, double* %retval, align 8 |
| 29 | + br label %return |
| 30 | + |
| 31 | +if.end3: ; preds = %if.end |
| 32 | + %2 = load double, double* %v.addr, align 8 |
| 33 | + store double %2, double* %retval, align 8 |
| 34 | + br label %return |
| 35 | + |
| 36 | +return: ; preds = %if.end3, %if.then2, %if.then |
| 37 | + %3 = load double, double* %retval, align 8 |
| 38 | + ret double %3 |
| 39 | +} |
| 40 | + |
| 41 | +define void @loop(double* %X, double* %Y) { |
| 42 | +; CHECK-LABEL: @loop( |
| 43 | +; CHECK-NEXT: entry: |
| 44 | +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr double, double* [[X:%.*]], i64 20000 |
| 45 | +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr double, double* [[Y:%.*]], i64 20000 |
| 46 | +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ugt double* [[SCEVGEP9]], [[X]] |
| 47 | +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ugt double* [[SCEVGEP]], [[Y]] |
| 48 | +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] |
| 49 | +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY:%.*]] |
| 50 | +; CHECK: vector.body: |
| 51 | +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[ENTRY:%.*]] ] |
| 52 | +; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[INDEX]] to i64 |
| 53 | +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[TMP0]] |
| 54 | +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>* |
| 55 | +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !alias.scope !0 |
| 56 | +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2 |
| 57 | +; CHECK-NEXT: [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>* |
| 58 | +; CHECK-NEXT: [[WIDE_LOAD11:%.*]] = load <2 x double>, <2 x double>* [[TMP4]], align 8, !alias.scope !0 |
| 59 | +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD]], zeroinitializer |
| 60 | +; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <2 x double> [[WIDE_LOAD11]], zeroinitializer |
| 61 | +; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD]], <double 6.000000e+00, double 6.000000e+00> |
| 62 | +; CHECK-NEXT: [[TMP8:%.*]] = fcmp ogt <2 x double> [[WIDE_LOAD11]], <double 6.000000e+00, double 6.000000e+00> |
| 63 | +; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD]] |
| 64 | +; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP8]], <2 x double> <double 6.000000e+00, double 6.000000e+00>, <2 x double> [[WIDE_LOAD11]] |
| 65 | +; CHECK-NEXT: [[TMP11:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP9]] |
| 66 | +; CHECK-NEXT: [[TMP12:%.*]] = select <2 x i1> [[TMP6]], <2 x double> zeroinitializer, <2 x double> [[TMP10]] |
| 67 | +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[TMP0]] |
| 68 | +; CHECK-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to <2 x double>* |
| 69 | +; CHECK-NEXT: store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8, !alias.scope !3, !noalias !0 |
| 70 | +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, double* [[TMP13]], i64 2 |
| 71 | +; CHECK-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP15]] to <2 x double>* |
| 72 | +; CHECK-NEXT: store <2 x double> [[TMP12]], <2 x double>* [[TMP16]], align 8, !alias.scope !3, !noalias !0 |
| 73 | +; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 |
| 74 | +; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20000 |
| 75 | +; CHECK-NEXT: br i1 [[TMP17]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] |
| 76 | +; CHECK: for.cond.cleanup: |
| 77 | +; CHECK-NEXT: ret void |
| 78 | +; CHECK: for.body: |
| 79 | +; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] |
| 80 | +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I_05]] to i64 |
| 81 | +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[Y]], i64 [[IDXPROM]] |
| 82 | +; CHECK-NEXT: [[TMP18:%.*]] = load double, double* [[ARRAYIDX]], align 8 |
| 83 | +; CHECK-NEXT: [[CMP_I:%.*]] = fcmp olt double [[TMP18]], 0.000000e+00 |
| 84 | +; CHECK-NEXT: [[CMP1_I:%.*]] = fcmp ogt double [[TMP18]], 6.000000e+00 |
| 85 | +; CHECK-NEXT: [[DOTV_I:%.*]] = select i1 [[CMP1_I]], double 6.000000e+00, double [[TMP18]] |
| 86 | +; CHECK-NEXT: [[RETVAL_0_I:%.*]] = select i1 [[CMP_I]], double 0.000000e+00, double [[DOTV_I]] |
| 87 | +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[X]], i64 [[IDXPROM]] |
| 88 | +; CHECK-NEXT: store double [[RETVAL_0_I]], double* [[ARRAYIDX2]], align 8 |
| 89 | +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_05]], 1 |
| 90 | +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I_05]], 19999 |
| 91 | +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP7:![0-9]+]] |
| 92 | +; |
| 93 | +entry: |
| 94 | + %X.addr = alloca double*, align 8 |
| 95 | + %Y.addr = alloca double*, align 8 |
| 96 | + %i = alloca i32, align 4 |
| 97 | + store double* %X, double** %X.addr, align 8 |
| 98 | + store double* %Y, double** %Y.addr, align 8 |
| 99 | + %0 = bitcast i32* %i to i8* |
| 100 | + call void @llvm.lifetime.start.p0i8(i64 4, i8* %0) #2 |
| 101 | + store i32 0, i32* %i, align 4 |
| 102 | + br label %for.cond |
| 103 | + |
| 104 | +for.cond: ; preds = %for.inc, %entry |
| 105 | + %1 = load i32, i32* %i, align 4 |
| 106 | + %cmp = icmp ult i32 %1, 20000 |
| 107 | + br i1 %cmp, label %for.body, label %for.cond.cleanup |
| 108 | + |
| 109 | +for.cond.cleanup: ; preds = %for.cond |
| 110 | + %2 = bitcast i32* %i to i8* |
| 111 | + call void @llvm.lifetime.end.p0i8(i64 4, i8* %2) #2 |
| 112 | + br label %for.end |
| 113 | + |
| 114 | +for.body: ; preds = %for.cond |
| 115 | + %3 = load double*, double** %Y.addr, align 8 |
| 116 | + %4 = load i32, i32* %i, align 4 |
| 117 | + %idxprom = zext i32 %4 to i64 |
| 118 | + %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom |
| 119 | + %5 = load double, double* %arrayidx, align 8 |
| 120 | + %call = call double @clamp(double %5) |
| 121 | + %6 = load double*, double** %X.addr, align 8 |
| 122 | + %7 = load i32, i32* %i, align 4 |
| 123 | + %idxprom1 = zext i32 %7 to i64 |
| 124 | + %arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1 |
| 125 | + store double %call, double* %arrayidx2, align 8 |
| 126 | + br label %for.inc |
| 127 | + |
| 128 | +for.inc: ; preds = %for.body |
| 129 | + %8 = load i32, i32* %i, align 4 |
| 130 | + %inc = add i32 %8, 1 |
| 131 | + store i32 %inc, i32* %i, align 4 |
| 132 | + br label %for.cond |
| 133 | + |
| 134 | +for.end: ; preds = %for.cond.cleanup |
| 135 | + ret void |
| 136 | +} |
| 137 | + |
| 138 | +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) |
| 139 | + |
| 140 | +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) |
0 commit comments