|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 |
1 | 2 | ; RUN: opt -passes=pre-isel-intrinsic-lowering -S < %s | FileCheck %s
|
2 | 3 | target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
|
3 | 4 | target triple = "aarch64"
|
4 | 5 |
|
5 | 6 | define <vscale x 4 x float> @scalable_vec_exp(<vscale x 4 x float> %input) {
|
6 | 7 | ; CHECK-LABEL: define <vscale x 4 x float> @scalable_vec_exp(
|
7 |
| -; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() |
8 |
| -; CHECK-NEXT: [[LOOPEND:%.*]] = mul i64 [[VSCALE]], 4 |
9 |
| -; CHECK-NEXT: br label %[[LOOPBODY:.*]] |
10 |
| -; CHECK: [[LOOPBODY]]: |
11 |
| -; CHECK-NEXT: [[IDX:%.*]] = phi i64 [ 0, %0 ], [ [[NEW_IDX:%.*]], %[[LOOPBODY]] ] |
12 |
| -; CHECK-NEXT: [[VEC:%.*]] = phi <vscale x 4 x float> [ %input, %0 ], [ [[NEW_VEC:.*]], %[[LOOPBODY]] ] |
13 |
| -; CHECK-NEXT: [[ELEM:%.*]] = extractelement <vscale x 4 x float> [[VEC]], i64 [[IDX]] |
14 |
| -; CHECK-NEXT: [[RES:%.*]] = call float @llvm.exp.f32(float [[ELEM]]) |
15 |
| -; CHECK-NEXT: [[NEW_VEC:%.*]] = insertelement <vscale x 4 x float> [[VEC]], float [[RES]], i64 [[IDX]] |
16 |
| -; CHECK-NEXT: [[NEW_IDX]] = add i64 [[IDX]], 1 |
17 |
| -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[NEW_IDX]], [[LOOPEND]] |
18 |
| -; CHECK-NEXT: br i1 [[CMP]], label %[[LOOPEXIT:.*]], label %[[LOOPBODY]] |
19 |
| -; CHECK: [[LOOPEXIT]]: |
| 8 | +; CHECK-SAME: <vscale x 4 x float> [[INPUT:%.*]]) { |
| 9 | +; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() |
| 10 | +; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 |
| 11 | +; CHECK-NEXT: br label %[[BB3:.*]] |
| 12 | +; CHECK: [[BB3]]: |
| 13 | +; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP9:%.*]], %[[BB3]] ] |
| 14 | +; CHECK-NEXT: [[TMP5:%.*]] = phi <vscale x 4 x float> [ [[INPUT]], [[TMP0]] ], [ [[NEW_VEC:%.*]], %[[BB3]] ] |
| 15 | +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <vscale x 4 x float> [[TMP5]], i64 [[TMP4]] |
| 16 | +; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.exp.f32(float [[TMP6]]) |
| 17 | +; CHECK-NEXT: [[NEW_VEC]] = insertelement <vscale x 4 x float> [[TMP5]], float [[TMP7]], i64 [[TMP4]] |
| 18 | +; CHECK-NEXT: [[TMP9]] = add i64 [[TMP4]], 1 |
| 19 | +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], [[TMP2]] |
| 20 | +; CHECK-NEXT: br i1 [[TMP10]], label %[[BB11:.*]], label %[[BB3]] |
| 21 | +; CHECK: [[BB11]]: |
20 | 22 | ; CHECK-NEXT: ret <vscale x 4 x float> [[NEW_VEC]]
|
| 23 | +; |
21 | 24 | %output = call <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float> %input)
|
22 | 25 | ret <vscale x 4 x float> %output
|
23 | 26 | }
|
24 | 27 |
|
25 |
| -; CHECK: declare i64 @llvm.vscale.i64() #1 |
26 |
| -; CHECK: declare float @llvm.exp.f32(float) #0 |
| 28 | +define <4 x float> @fixed_vec_exp(<4 x float> %input) { |
| 29 | +; CHECK-LABEL: define <4 x float> @fixed_vec_exp( |
| 30 | +; CHECK-SAME: <4 x float> [[INPUT:%.*]]) { |
| 31 | +; CHECK-NEXT: br label %[[BB1:.*]] |
| 32 | +; CHECK: [[BB1]]: |
| 33 | +; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP7:%.*]], %[[BB1]] ] |
| 34 | +; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x float> [ [[INPUT]], [[TMP0]] ], [ [[TMP6:%.*]], %[[BB1]] ] |
| 35 | +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i64 [[TMP2]] |
| 36 | +; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.exp.f32(float [[TMP4]]) |
| 37 | +; CHECK-NEXT: [[TMP6]] = insertelement <4 x float> [[TMP3]], float [[TMP5]], i64 [[TMP2]] |
| 38 | +; CHECK-NEXT: [[TMP7]] = add i64 [[TMP2]], 1 |
| 39 | +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 4 |
| 40 | +; CHECK-NEXT: br i1 [[TMP8]], label %[[BB9:.*]], label %[[BB1]] |
| 41 | +; CHECK: [[BB9]]: |
| 42 | +; CHECK-NEXT: ret <4 x float> [[TMP6]] |
| 43 | +; |
| 44 | + %output = call <4 x float> @llvm.exp.v4f32(<4 x float> %input) |
| 45 | + ret <4 x float> %output |
| 46 | +} |
| 47 | + |
| 48 | +declare <4 x float> @llvm.exp.v4f32(<4 x float>) #0 |
27 | 49 | declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>) #0
|
28 | 50 |
|
29 | 51 | ; CHECK: attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
0 commit comments