[LoopVectorize][X86] amdlibm-calls.ll - add additional 2/4/8/16 vector widths test checks

RKSimon · RKSimon · commit 2f95298727bc · 2024-08-29T14:27:31.000+01:00
This should cover most amdlibm functions, but still not added every VF combo (e.g. 2f32/16f64 often vectorises to the llvm intrinsic for that vector type)
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -77,6 +77,7 @@ declare float @llvm.exp2.f32(float) #0
 
 define void @sin_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -102,6 +103,7 @@ for.end:
 define void @sin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -125,6 +127,7 @@ for.end:
 
 define void @sin_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_sin(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_sin(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_sin(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -150,6 +153,7 @@ for.end:
 define void @sin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @sin_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_sinf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_sinf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_sinf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -173,6 +177,7 @@ for.end:
 
 define void @cos_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -198,6 +203,7 @@ for.end:
 define void @cos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -221,6 +227,7 @@ for.end:
 
 define void @cos_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cos(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_cos(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_cos(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -246,6 +253,7 @@ for.end:
 define void @cos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cos_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_cosf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_cosf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_cosf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -295,6 +303,7 @@ for.end:
 define void @tan_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @tan_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -344,6 +353,7 @@ for.end:
 define void @tan_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @tan_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -368,6 +378,7 @@ for.end:
 define void @acos_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @acos_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -392,6 +403,7 @@ for.end:
 define void @acos_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @acos_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -439,6 +451,7 @@ for.end:
 define void @asin_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @asin_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -486,6 +499,7 @@ for.end:
 define void @asin_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @asin_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -535,6 +549,7 @@ for.end:
 define void @atan_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -584,6 +599,7 @@ for.end:
 define void @atan_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @atan_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -631,6 +647,7 @@ for.end:
 define void @cosh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @cosh_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -677,6 +694,7 @@ for.end:
 define void @cosh_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @cosh_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
 entry:
@@ -700,6 +718,7 @@ for.end:
 define void @tanh_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @tanh_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -724,6 +743,7 @@ for.end:
 define void @tanh_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @tanh_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -747,6 +767,7 @@ for.end:
 
 define void @pow_f64(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64(
+; CHECK-VF2:    [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:        ret void
@@ -773,6 +794,7 @@ for.end:
 
 define void @pow_f64_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f64_intrinsic(
+; CHECK-VF2:    [[TMP8:%.*]] = call <2 x double> @amd_vrd2_pow(<2 x double> [[TMP4:%.*]], <2 x double> [[WIDE_LOAD:%.*]])
 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x double> @amd_vrd4_pow(<4 x double> [[TMP4:%.*]], <4 x double> [[WIDE_LOAD:%.*]])
 ; CHECK-VF8:    [[TMP8:%.*]] = call <8 x double> @amd_vrd8_pow(<8 x double> [[TMP4:%.*]], <8 x double> [[WIDE_LOAD:%.*]])
 ; CHECK:        ret void
@@ -800,6 +822,7 @@ for.end:
 define void @pow_f32(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32(
 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
+; CHECK-VF8:    [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
 ; CHECK-VF16:   [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
 ; CHECK:        ret void
 ;
@@ -826,6 +849,7 @@ for.end:
 define void @pow_f32_intrinsic(ptr nocapture %varray, ptr nocapture readonly %exp) {
 ; CHECK-LABEL: @pow_f32_intrinsic(
 ; CHECK-VF4:    [[TMP8:%.*]] = call <4 x float> @amd_vrs4_powf(<4 x float> [[TMP4:%.*]], <4 x float> [[WIDE_LOAD:%.*]])
+; CHECK-VF8:    [[TMP8:%.*]] = call <8 x float> @amd_vrs8_powf(<8 x float> [[TMP4:%.*]], <8 x float> [[WIDE_LOAD:%.*]])
 ; CHECK-VF16:   [[TMP8:%.*]] = call <16 x float> @amd_vrs16_powf(<16 x float> [[TMP4:%.*]], <16 x float> [[WIDE_LOAD:%.*]])
 ; CHECK:        ret void
 ;
@@ -851,6 +875,7 @@ for.end:
 
 define void @exp_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -876,6 +901,7 @@ for.end:
 define void @exp_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -899,6 +925,7 @@ for.end:
 
 define void @exp_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -924,6 +951,7 @@ for.end:
 define void @exp_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_expf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_expf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -947,6 +975,7 @@ for.end:
 
 define void @log_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -972,6 +1001,7 @@ for.end:
 define void @log_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -995,6 +1025,7 @@ for.end:
 
 define void @log_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -1020,6 +1051,7 @@ for.end:
 define void @log_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_logf(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_logf(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_logf(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -1043,6 +1075,7 @@ for.end:
 
 define void @log2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -1068,6 +1101,7 @@ for.end:
 define void @log2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -1091,6 +1125,7 @@ for.end:
 
 define void @log2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_log2(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_log2(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_log2(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -1116,6 +1151,7 @@ for.end:
 define void @log2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log2_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log2f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log2f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log2f(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -1140,6 +1176,7 @@ for.end:
 define void @log10_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -1164,6 +1201,7 @@ for.end:
 define void @log10_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @log10_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_log10f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_log10f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_log10f(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -1187,6 +1225,7 @@ for.end:
 
 define void @exp2_f64(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -1212,6 +1251,7 @@ for.end:
 define void @exp2_f32(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;
@@ -1235,6 +1275,7 @@ for.end:
 
 define void @exp2_f64_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f64_intrinsic(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp2(<2 x double> [[TMP4:%.*]])
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x double> @amd_vrd4_exp2(<4 x double> [[TMP4:%.*]])
 ; CHECK-VF8:    [[TMP5:%.*]] = call <8 x double> @amd_vrd8_exp2(<8 x double> [[TMP4:%.*]])
 ; CHECK:        ret void
@@ -1260,6 +1301,7 @@ for.end:
 define void @exp2_f32_intrinsic(ptr nocapture %varray) {
 ; CHECK-LABEL: @exp2_f32_intrinsic(
 ; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp2f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp2f(<8 x float> [[TMP4:%.*]])
 ; CHECK-VF16:   [[TMP5:%.*]] = call <16 x float> @amd_vrs16_exp2f(<16 x float> [[TMP4:%.*]])
 ; CHECK:        ret void
 ;