|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 |
| 2 | +; Two-entry phi nodes with unpredictable conditions may get increased budget for folding. |
| 3 | +; RUN: opt < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s |
| 4 | +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-FOLD %s |
| 5 | + |
| 6 | +define { <2 x float>, <2 x float> } @foo(float %speed, <2 x float> %velocity.coerce0, <2 x float> %velocity.coerce1) { |
| 7 | +; CHECK-NOFOLD-LABEL: define { <2 x float>, <2 x float> } @foo( |
| 8 | +; CHECK-NOFOLD-SAME: float [[SPEED:%.*]], <2 x float> [[VELOCITY_COERCE0:%.*]], <2 x float> [[VELOCITY_COERCE1:%.*]]) { |
| 9 | +; CHECK-NOFOLD-NEXT: [[ENTRY:.*]]: |
| 10 | +; CHECK-NOFOLD-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[SPEED]], 0x3F747AE140000000 |
| 11 | +; CHECK-NOFOLD-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]], !unpredictable [[META0:![0-9]+]] |
| 12 | +; CHECK-NOFOLD: [[IF_THEN]]: |
| 13 | +; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 0 |
| 14 | +; CHECK-NOFOLD-NEXT: [[MUL_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_0_VEC_EXTRACT]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]] |
| 15 | +; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 1 |
| 16 | +; CHECK-NOFOLD-NEXT: [[MUL8_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_4_VEC_EXTRACT]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]] |
| 17 | +; CHECK-NOFOLD-NEXT: [[ADD_I_I_I_I:%.*]] = fadd fast float [[MUL8_I_I_I_I]], [[MUL_I_I_I_I]] |
| 18 | +; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE1]], i64 0 |
| 19 | +; CHECK-NOFOLD-NEXT: [[MUL13_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_14_8_VEC_EXTRACT]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]] |
| 20 | +; CHECK-NOFOLD-NEXT: [[ADD14_I_I_I_I:%.*]] = fadd fast float [[ADD_I_I_I_I]], [[MUL13_I_I_I_I]] |
| 21 | +; CHECK-NOFOLD-NEXT: [[TMP0:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[ADD14_I_I_I_I]]) |
| 22 | +; CHECK-NOFOLD-NEXT: [[MUL_I_I_I:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[TMP0]] |
| 23 | +; CHECK-NOFOLD-NEXT: [[SUB_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]] |
| 24 | +; CHECK-NOFOLD-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[SUB_I]], i64 0 |
| 25 | +; CHECK-NOFOLD-NEXT: [[SUB8_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]] |
| 26 | +; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_INSERT25:%.*]] = insertelement <2 x float> [[TMP1]], float [[SUB8_I]], i64 1 |
| 27 | +; CHECK-NOFOLD-NEXT: [[SUB13_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]] |
| 28 | +; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_INSERT35:%.*]] = insertelement <2 x float> [[VELOCITY_COERCE1]], float [[SUB13_I]], i64 0 |
| 29 | +; CHECK-NOFOLD-NEXT: br label %[[IF_END]] |
| 30 | +; CHECK-NOFOLD: [[IF_END]]: |
| 31 | +; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_0:%.*]] = phi nsz <2 x float> [ [[VELOCITY_SROA_0_4_VEC_INSERT25]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ] |
| 32 | +; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_0:%.*]] = phi nsz <2 x float> [ [[VELOCITY_SROA_14_8_VEC_INSERT35]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ] |
| 33 | +; CHECK-NOFOLD-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[VELOCITY_SROA_0_0]], 0 |
| 34 | +; CHECK-NOFOLD-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[VELOCITY_SROA_14_0]], 1 |
| 35 | +; CHECK-NOFOLD-NEXT: ret { <2 x float>, <2 x float> } [[DOTFCA_1_INSERT]] |
| 36 | +; |
| 37 | +; CHECK-FOLD-LABEL: define { <2 x float>, <2 x float> } @foo( |
| 38 | +; CHECK-FOLD-SAME: float [[SPEED:%.*]], <2 x float> [[VELOCITY_COERCE0:%.*]], <2 x float> [[VELOCITY_COERCE1:%.*]]) #[[ATTR0:[0-9]+]] { |
| 39 | +; CHECK-FOLD-NEXT: [[ENTRY:.*:]] |
| 40 | +; CHECK-FOLD-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[SPEED]], 0x3F747AE140000000 |
| 41 | +; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 0 |
| 42 | +; CHECK-FOLD-NEXT: [[MUL_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_0_VEC_EXTRACT]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]] |
| 43 | +; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 1 |
| 44 | +; CHECK-FOLD-NEXT: [[MUL8_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_4_VEC_EXTRACT]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]] |
| 45 | +; CHECK-FOLD-NEXT: [[ADD_I_I_I_I:%.*]] = fadd fast float [[MUL8_I_I_I_I]], [[MUL_I_I_I_I]] |
| 46 | +; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE1]], i64 0 |
| 47 | +; CHECK-FOLD-NEXT: [[MUL13_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_14_8_VEC_EXTRACT]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]] |
| 48 | +; CHECK-FOLD-NEXT: [[ADD14_I_I_I_I:%.*]] = fadd fast float [[ADD_I_I_I_I]], [[MUL13_I_I_I_I]] |
| 49 | +; CHECK-FOLD-NEXT: [[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[ADD14_I_I_I_I]]) |
| 50 | +; CHECK-FOLD-NEXT: [[MUL_I_I_I:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[TMP0]] |
| 51 | +; CHECK-FOLD-NEXT: [[SUB_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]] |
| 52 | +; CHECK-FOLD-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[SUB_I]], i64 0 |
| 53 | +; CHECK-FOLD-NEXT: [[SUB8_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]] |
| 54 | +; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_INSERT25:%.*]] = insertelement <2 x float> [[TMP1]], float [[SUB8_I]], i64 1 |
| 55 | +; CHECK-FOLD-NEXT: [[SUB13_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]] |
| 56 | +; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_INSERT35:%.*]] = insertelement <2 x float> [[VELOCITY_COERCE1]], float [[SUB13_I]], i64 0 |
| 57 | +; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_0:%.*]] = select nsz i1 [[CMP]], <2 x float> [[VELOCITY_SROA_0_4_VEC_INSERT25]], <2 x float> zeroinitializer, !unpredictable [[META0:![0-9]+]] |
| 58 | +; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_0:%.*]] = select nsz i1 [[CMP]], <2 x float> [[VELOCITY_SROA_14_8_VEC_INSERT35]], <2 x float> zeroinitializer, !unpredictable [[META0]] |
| 59 | +; CHECK-FOLD-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[VELOCITY_SROA_0_0]], 0 |
| 60 | +; CHECK-FOLD-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[VELOCITY_SROA_14_0]], 1 |
| 61 | +; CHECK-FOLD-NEXT: ret { <2 x float>, <2 x float> } [[DOTFCA_1_INSERT]] |
| 62 | +; |
| 63 | +entry: |
| 64 | + %cmp = fcmp fast ogt float %speed, 0x3F747AE140000000 |
| 65 | + br i1 %cmp, label %if.then, label %if.end, !unpredictable !0 |
| 66 | + |
| 67 | +if.then: |
| 68 | + %velocity.sroa.0.0.vec.extract = extractelement <2 x float> %velocity.coerce0, i64 0 |
| 69 | + %mul.i.i.i.i = fmul fast float %velocity.sroa.0.0.vec.extract, %velocity.sroa.0.0.vec.extract |
| 70 | + %velocity.sroa.0.4.vec.extract = extractelement <2 x float> %velocity.coerce0, i64 1 |
| 71 | + %mul8.i.i.i.i = fmul fast float %velocity.sroa.0.4.vec.extract, %velocity.sroa.0.4.vec.extract |
| 72 | + %add.i.i.i.i = fadd fast float %mul8.i.i.i.i, %mul.i.i.i.i |
| 73 | + %velocity.sroa.14.8.vec.extract = extractelement <2 x float> %velocity.coerce1, i64 0 |
| 74 | + %mul13.i.i.i.i = fmul fast float %velocity.sroa.14.8.vec.extract, %velocity.sroa.14.8.vec.extract |
| 75 | + %add14.i.i.i.i = fadd fast float %add.i.i.i.i, %mul13.i.i.i.i |
| 76 | + %0 = tail call fast noundef float @llvm.sqrt.f32(float %add14.i.i.i.i) |
| 77 | + %mul.i.i.i = fdiv fast float 0x3FEFD70A40000000, %0 |
| 78 | + %sub.i = fmul fast float %mul.i.i.i, %velocity.sroa.0.0.vec.extract |
| 79 | + %1 = insertelement <2 x float> poison, float %sub.i, i64 0 |
| 80 | + %sub8.i = fmul fast float %mul.i.i.i, %velocity.sroa.0.4.vec.extract |
| 81 | + %velocity.sroa.0.4.vec.insert25 = insertelement <2 x float> %1, float %sub8.i, i64 1 |
| 82 | + %sub13.i = fmul fast float %mul.i.i.i, %velocity.sroa.14.8.vec.extract |
| 83 | + %velocity.sroa.14.8.vec.insert35 = insertelement <2 x float> %velocity.coerce1, float %sub13.i, i64 0 |
| 84 | + br label %if.end |
| 85 | + |
| 86 | +if.end: |
| 87 | + %velocity.sroa.0.0 = phi nsz <2 x float> [ %velocity.sroa.0.4.vec.insert25, %if.then ], [ zeroinitializer, %entry ] |
| 88 | + %velocity.sroa.14.0 = phi nsz <2 x float> [ %velocity.sroa.14.8.vec.insert35, %if.then ], [ zeroinitializer, %entry ] |
| 89 | + %.fca.0.insert = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> %velocity.sroa.0.0, 0 |
| 90 | + %.fca.1.insert = insertvalue { <2 x float>, <2 x float> } %.fca.0.insert, <2 x float> %velocity.sroa.14.0, 1 |
| 91 | + ret { <2 x float>, <2 x float> } %.fca.1.insert |
| 92 | +} |
| 93 | + |
| 94 | +declare float @llvm.sqrt.f32(float) |
| 95 | + |
| 96 | +!0 = !{} |
0 commit comments