-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Analysis] isTriviallyVectorizable - add vectorization support for acos/asin/atan and cosh/sinh/tanh intrinsics #106584
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Simon Pilgrim (RKSimon) ChangesShow fallback cases in amdlibm tests where it doesn't have that specific op Full diff: https://github.com/llvm/llvm-project/pull/106584.diff 2 Files Affected:
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index cc742ab35f4498..32ce34114b2f50 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -66,9 +66,15 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::umul_fix:
case Intrinsic::umul_fix_sat:
case Intrinsic::sqrt: // Begin floating-point.
+ case Intrinsic::asin:
+ case Intrinsic::acos:
+ case Intrinsic::atan:
case Intrinsic::sin:
case Intrinsic::cos:
case Intrinsic::tan:
+ case Intrinsic::sinh:
+ case Intrinsic::cosh:
+ case Intrinsic::tanh:
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::log:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index 04289d43f40e2f..c051e2f18380bd 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -414,6 +414,7 @@ for.end:
define void @acos_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @acos_f32_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
@@ -487,7 +488,10 @@ for.end:
define void @asin_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f64_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -510,6 +514,7 @@ for.end:
define void @asin_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f32_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
@@ -588,6 +593,7 @@ define void @atan_f64_intrinsic(ptr nocapture %varray) {
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -610,6 +616,7 @@ for.end:
define void @atan_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @atan_f32_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
@@ -683,6 +690,9 @@ for.end:
define void @cosh_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f64_intrinsic(
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -705,8 +715,10 @@ for.end:
define void @cosh_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f32_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -754,6 +766,7 @@ for.end:
define void @tanh_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @tanh_f32_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
|
@RKSimon I didn't add |
Could you add the tests for other targets? |
This no longer appears to be an issue. |
…nh/tanh intrinsics to support #106584
8d660e1
to
519c030
Compare
Could you add the tests for AArch64 to RISCV too? |
What attributes / veclibs should I use for riscv? llvm-project\llvm\test\Transforms\SLPVectorizer\RISCV\math-function.ll doesn't seem to do much |
Without veclibs, just some basic stuff |
Thats what math-function.ll already does already |
But not newly added functions |
…intrinsics to support #106584
…vectorize acos/asin/atan and cosh/sinh/tanh intrinsics
…os/asin/atan and cosh/sinh/tanh intrinsics
519c030
to
86e6eb7
Compare
ceb613a adds missing riscv coverage - checks that there is no vectorisation |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG, thanks!
…os/asin/atan and cosh/sinh/tanh libcalls Followup to #llvm#106584 - ensure acos/asin/atan and cosh/sinh/tanh libcalls correctly map to the llvm intrinsic equivalents
…os/asin/atan and cosh/sinh/tanh libcalls Followup to #llvm#106584 - ensure acos/asin/atan and cosh/sinh/tanh libcalls correctly map to the llvm intrinsic equivalents
Show fallback cases in amdlibm tests where it doesn't have that specific op