-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Add baseline test for cost of abs intrinsics #100522
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This was referenced Jul 25, 2024
This was referenced Jul 25, 2024
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesPatch is 36.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/100522.diff 1 Files Affected:
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/abs.ll b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
new file mode 100644
index 0000000000000..133b95609bc15
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/abs.ll
@@ -0,0 +1,341 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST %s
+; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=FAST-SIZE %s
+; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s
+; END.
+
+declare i64 @llvm.abs.i64(i64, i1 immarg)
+declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1 immarg)
+declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1 immarg)
+declare <5 x i64> @llvm.abs.v5i64(<5 x i64>, i1 immarg)
+declare <8 x i64> @llvm.abs.v8i64(<8 x i64>, i1 immarg)
+
+declare i32 @llvm.abs.i32(i32, i1 immarg)
+declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1 immarg)
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1 immarg)
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1 immarg)
+declare <9 x i32> @llvm.abs.v9i32(<9 x i32>, i1 immarg)
+declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg)
+
+declare i16 @llvm.abs.i16(i16, i1 immarg)
+declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1 immarg)
+declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1 immarg)
+declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1 immarg)
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg)
+declare <17 x i16> @llvm.abs.v17i16(<17 x i16>, i1 immarg)
+declare <32 x i16> @llvm.abs.v32i16(<32 x i16>, i1 immarg)
+
+declare i8 @llvm.abs.i8(i8, i1 immarg)
+declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1 immarg)
+declare <4 x i8> @llvm.abs.v4i8(<4 x i8>, i1 immarg)
+declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1 immarg)
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1 immarg)
+declare <32 x i8> @llvm.abs.v32i8(<32 x i8>, i1 immarg)
+declare <33 x i8> @llvm.abs.v33i8(<33 x i8>, i1 immarg)
+declare <64 x i8> @llvm.abs.v64i8(<64 x i8>, i1 immarg)
+
+define i32 @abs_nonpoison(i32 %arg) {
+; FAST-LABEL: 'abs_nonpoison'
+; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 114 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V33I8 = call <33 x i8> @llvm.abs.v33i8(<33 x i8> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
+; FAST-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+;
+; SLOW-LABEL: 'abs_nonpoison'
+; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V33I8 = call <33 x i8> @llvm.abs.v33i8(<33 x i8> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 322 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
+; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
+;
+; FAST-SIZE-LABEL: 'abs_nonpoison'
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V33I8 = call <33 x i8> @llvm.abs.v33i8(<33 x i8> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
+; FAST-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+; SLOW-SIZE-LABEL: 'abs_nonpoison'
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V33I8 = call <33 x i8> @llvm.abs.v33i8(<33 x i8> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
+; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
+;
+ %I64 = call i64 @llvm.abs.i64(i64 undef, i1 false)
+ %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 false)
+ %V4I64 = call <4 x i64> @llvm.abs.v4i64(<4 x i64> undef, i1 false)
+ %V5I64 = call <5 x i64> @llvm.abs.v5i64(<5 x i64> undef, i1 false)
+ %V8I64 = call <8 x i64> @llvm.abs.v8i64(<8 x i64> undef, i1 false)
+
+ %I32 = call i32 @llvm.abs.i32(i32 undef, i1 false)
+ %V2I32 = call <2 x i32> @llvm.abs.v2i32(<2 x i32> undef, i1 false)
+ %V4I32 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> undef, i1 false)
+ %V8I32 = call <8 x i32> @llvm.abs.v8i32(<8 x i32> undef, i1 false)
+ %V9I32 = call <9 x i32> @llvm.abs.v9i32(<9 x i32> undef, i1 false)
+ %V16I32 = call <16 x i32> @llvm.abs.v16i32(<16 x i32> undef, i1 false)
+
+ %I16 = call i16 @llvm.abs.i16(i16 undef, i1 false)
+ %V2I16 = call <2 x i16> @llvm.abs.v2i16(<2 x i16> undef, i1 false)
+ %V4I16 = call <4 x i16> @llvm.abs.v4i16(<4 x i16> undef, i1 false)
+ %V8I16 = call <8 x i16> @llvm.abs.v8i16(<8 x i16> undef, i1 false)
+ %V16I16 = call <16 x i16> @llvm.abs.v16i16(<16 x i16> undef, i1 false)
+ %V17I16 = call <17 x i16> @llvm.abs.v17i16(<17 x i16> undef, i1 false)
+ %V32I16 = call <32 x i16> @llvm.abs.v32i16(<32 x i16> undef, i1 false)
+
+ %I8 = call i8 @llvm.abs.i8(i8 undef, i1 false)
+ %V2I8 = call <2 x i8> @llvm.abs.v2i8(<2 x i8> undef, i1 false)
+ %V4I8 = call <4 x i8> @llvm.abs.v4i8(<4 x i8> undef, i1 false)
+ %V8I8 = call <8 x i8> @llvm.abs.v8i8(<8 x i8> undef, i1 false)
+ %V16I8 = call <16 x i8> @llvm.abs.v16i8(<16 x i8> undef, i1 false)
+ %V32I8 = call <32 x i8> @llvm.abs.v32i8(<32 x i8> undef, i1 false)
+ %V33I8 = call <33 x i8> @llvm.abs.v33i8(<33 x i8> undef, i1 false)
+ %V64I8 = call <64 x i8> @llvm.abs.v64i8(<64 x i8> undef, i1 false)
+
+ ret i32 undef
+}
+
+define i32 @abs_poison(i32 %arg) {
+; FAST-LABEL: 'abs_poison'
+; FAST-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = call i64 @llvm.abs.i64(i64 undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = call <2 x i64> @llvm.abs.v2i64(<2 x i64> undef, i1 true)
+; FAST-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4I64 = call <4 x i64> ...
[truncated]
|
330c0e2
to
df2b6b7
Compare
Merge activity
|
9876e76
to
e7d5b89
Compare
6561bfd
to
cc1245c
Compare
Base automatically changed from
users/arsenm/amdgpu-add-slp-vectorize-test-integer-min-max
to
main
July 25, 2024 20:45
df2b6b7
to
eb121f6
Compare
eb121f6
to
c9b0504
Compare
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
backend:AMDGPU
llvm:analysis
Includes value tracking, cost tables and constant folding
llvm:transforms
vectorizers
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.