|
| 1 | +// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s |
| 2 | + |
| 3 | +typedef float float4 __attribute__((ext_vector_type(4))); |
| 4 | +typedef short int si8 __attribute__((ext_vector_type(8))); |
| 5 | +typedef unsigned int u4 __attribute__((ext_vector_type(4))); |
| 6 | + |
| 7 | +__attribute__((address_space(1))) int int_as_one; |
| 8 | +typedef int bar; |
| 9 | +bar b; |
| 10 | + |
| 11 | +void test_builtin_elementwise_max(float f1, float f2, double d1, double d2, |
| 12 | + float4 vf1, float4 vf2, long long int i1, |
| 13 | + long long int i2, si8 vi1, si8 vi2, |
| 14 | + unsigned u1, unsigned u2, u4 vu1, u4 vu2) { |
| 15 | + // CHECK-LABEL: define void @test_builtin_elementwise_max( |
| 16 | + |
| 17 | + // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 |
| 18 | + // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4 |
| 19 | + // CHECK-NEXT: call float @llvm.maxnum.f32(float %0, float %1) |
| 20 | + f1 = __builtin_elementwise_max(f1, f2); |
| 21 | + |
| 22 | + // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 |
| 23 | + // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8 |
| 24 | + // CHECK-NEXT: call double @llvm.maxnum.f64(double [[D1]], double [[D2]]) |
| 25 | + d1 = __builtin_elementwise_max(d1, d2); |
| 26 | + |
| 27 | + // CHECK: [[D2:%.+]] = load double, double* %d2.addr, align 8 |
| 28 | + // CHECK-NEXT: call double @llvm.maxnum.f64(double 2.000000e+01, double [[D2]]) |
| 29 | + d1 = __builtin_elementwise_max(20.0, d2); |
| 30 | + |
| 31 | + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 |
| 32 | + // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 |
| 33 | + // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) |
| 34 | + vf1 = __builtin_elementwise_max(vf1, vf2); |
| 35 | + |
| 36 | + // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 |
| 37 | + // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 |
| 38 | + // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 [[I2]]) |
| 39 | + i1 = __builtin_elementwise_max(i1, i2); |
| 40 | + |
| 41 | + // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 |
| 42 | + // CHECK-NEXT: call i64 @llvm.smax.i64(i64 [[I1]], i64 10) |
| 43 | + i1 = __builtin_elementwise_max(i1, 10); |
| 44 | + |
| 45 | + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 |
| 46 | + // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 |
| 47 | + // CHECK-NEXT: call <8 x i16> @llvm.smax.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) |
| 48 | + vi1 = __builtin_elementwise_max(vi1, vi2); |
| 49 | + |
| 50 | + // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 |
| 51 | + // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 |
| 52 | + // CHECK-NEXT: call i32 @llvm.umax.i32(i32 [[U1]], i32 [[U2]]) |
| 53 | + u1 = __builtin_elementwise_max(u1, u2); |
| 54 | + |
| 55 | + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 |
| 56 | + // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 |
| 57 | + // CHECK-NEXT: call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) |
| 58 | + vu1 = __builtin_elementwise_max(vu1, vu2); |
| 59 | + |
| 60 | + // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 |
| 61 | + // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 |
| 62 | + // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) |
| 63 | + const float4 cvf1 = vf1; |
| 64 | + vf1 = __builtin_elementwise_max(cvf1, vf2); |
| 65 | + |
| 66 | + // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 |
| 67 | + // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 |
| 68 | + // CHECK-NEXT: call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) |
| 69 | + vf1 = __builtin_elementwise_max(vf2, cvf1); |
| 70 | + |
| 71 | + // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 |
| 72 | + // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 |
| 73 | + // CHECK-NEXT: call i32 @llvm.smax.i32(i32 [[IAS1]], i32 [[B]]) |
| 74 | + int_as_one = __builtin_elementwise_max(int_as_one, b); |
| 75 | + |
| 76 | + // CHECK: call i32 @llvm.smax.i32(i32 1, i32 97) |
| 77 | + i1 = __builtin_elementwise_max(1, 'a'); |
| 78 | +} |
| 79 | + |
| 80 | +void test_builtin_elementwise_min(float f1, float f2, double d1, double d2, |
| 81 | + float4 vf1, float4 vf2, long long int i1, |
| 82 | + long long int i2, si8 vi1, si8 vi2, |
| 83 | + unsigned u1, unsigned u2, u4 vu1, u4 vu2) { |
| 84 | + // CHECK-LABEL: define void @test_builtin_elementwise_min( |
| 85 | + // CHECK: [[F1:%.+]] = load float, float* %f1.addr, align 4 |
| 86 | + // CHECK-NEXT: [[F2:%.+]] = load float, float* %f2.addr, align 4 |
| 87 | + // CHECK-NEXT: call float @llvm.minnum.f32(float %0, float %1) |
| 88 | + f1 = __builtin_elementwise_min(f1, f2); |
| 89 | + |
| 90 | + // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 |
| 91 | + // CHECK-NEXT: [[D2:%.+]] = load double, double* %d2.addr, align 8 |
| 92 | + // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double [[D2]]) |
| 93 | + d1 = __builtin_elementwise_min(d1, d2); |
| 94 | + |
| 95 | + // CHECK: [[D1:%.+]] = load double, double* %d1.addr, align 8 |
| 96 | + // CHECK-NEXT: call double @llvm.minnum.f64(double [[D1]], double 2.000000e+00) |
| 97 | + d1 = __builtin_elementwise_min(d1, 2.0); |
| 98 | + |
| 99 | + // CHECK: [[VF1:%.+]] = load <4 x float>, <4 x float>* %vf1.addr, align 16 |
| 100 | + // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 |
| 101 | + // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF1]], <4 x float> [[VF2]]) |
| 102 | + vf1 = __builtin_elementwise_min(vf1, vf2); |
| 103 | + |
| 104 | + // CHECK: [[I1:%.+]] = load i64, i64* %i1.addr, align 8 |
| 105 | + // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 |
| 106 | + // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[I1]], i64 [[I2]]) |
| 107 | + i1 = __builtin_elementwise_min(i1, i2); |
| 108 | + |
| 109 | + // CHECK: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 |
| 110 | + // CHECK-NEXT: call i64 @llvm.smin.i64(i64 -11, i64 [[I2]]) |
| 111 | + i1 = __builtin_elementwise_min(-11, i2); |
| 112 | + |
| 113 | + // CHECK: [[VI1:%.+]] = load <8 x i16>, <8 x i16>* %vi1.addr, align 16 |
| 114 | + // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, <8 x i16>* %vi2.addr, align 16 |
| 115 | + // CHECK-NEXT: call <8 x i16> @llvm.smin.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]]) |
| 116 | + vi1 = __builtin_elementwise_min(vi1, vi2); |
| 117 | + |
| 118 | + // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 |
| 119 | + // CHECK-NEXT: [[U2:%.+]] = load i32, i32* %u2.addr, align 4 |
| 120 | + // CHECK-NEXT: call i32 @llvm.umin.i32(i32 [[U1]], i32 [[U2]]) |
| 121 | + u1 = __builtin_elementwise_min(u1, u2); |
| 122 | + |
| 123 | + // CHECK: [[U1:%.+]] = load i32, i32* %u1.addr, align 4 |
| 124 | + // CHECK-NEXT: [[ZEXT_U1:%.+]] = zext i32 [[U1]] to i64 |
| 125 | + // CHECK-NEXT: [[I2:%.+]] = load i64, i64* %i2.addr, align 8 |
| 126 | + // CHECK-NEXT: call i64 @llvm.smin.i64(i64 [[ZEXT_U1]], i64 [[I2]]) |
| 127 | + u1 = __builtin_elementwise_min(u1, i2); |
| 128 | + |
| 129 | + // CHECK: [[VU1:%.+]] = load <4 x i32>, <4 x i32>* %vu1.addr, align 16 |
| 130 | + // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, <4 x i32>* %vu2.addr, align 16 |
| 131 | + // CHECK-NEXT: call <4 x i32> @llvm.umin.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]]) |
| 132 | + vu1 = __builtin_elementwise_min(vu1, vu2); |
| 133 | + |
| 134 | + // CHECK: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 |
| 135 | + // CHECK-NEXT: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 |
| 136 | + // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[CVF1]], <4 x float> [[VF2]]) |
| 137 | + const float4 cvf1 = vf1; |
| 138 | + vf1 = __builtin_elementwise_min(cvf1, vf2); |
| 139 | + |
| 140 | + // CHECK: [[VF2:%.+]] = load <4 x float>, <4 x float>* %vf2.addr, align 16 |
| 141 | + // CHECK-NEXT: [[CVF1:%.+]] = load <4 x float>, <4 x float>* %cvf1, align 16 |
| 142 | + // CHECK-NEXT: call <4 x float> @llvm.minnum.v4f32(<4 x float> [[VF2]], <4 x float> [[CVF1]]) |
| 143 | + vf1 = __builtin_elementwise_min(vf2, cvf1); |
| 144 | + |
| 145 | + // CHECK: [[IAS1:%.+]] = load i32, i32 addrspace(1)* @int_as_one, align 4 |
| 146 | + // CHECK-NEXT: [[B:%.+]] = load i32, i32* @b, align 4 |
| 147 | + // CHECK-NEXT: call i32 @llvm.smin.i32(i32 [[IAS1]], i32 [[B]]) |
| 148 | + int_as_one = __builtin_elementwise_min(int_as_one, b); |
| 149 | +} |
0 commit comments