-
Notifications
You must be signed in to change notification settings - Fork 14.3k
R600: Expand is_fpclass #135234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
R600: Expand is_fpclass #135234
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesFixes #135083 Full diff: https://github.com/llvm/llvm-project/pull/135234.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 157ca4b08020a..aa025c5307226 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -100,6 +100,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FSUB, MVT::f32, Expand);
+ setOperationAction(ISD::IS_FPCLASS,
+ {MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
+ MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
+ Expand);
+
setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},
MVT::f64, Custom);
diff --git a/llvm/test/CodeGen/AMDGPU/r600.llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/r600.llvm.is.fpclass.ll
new file mode 100644
index 0000000000000..43ea3f80848db
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/r600.llvm.is.fpclass.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefix=CM %s
+
+define amdgpu_kernel void @isnan_f32(ptr addrspace(1) %out, float %x) {
+; CM-LABEL: isnan_f32:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
+; CM-NEXT: SETGT_INT * T0.W, PV.W, literal.x,
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
+; CM-NEXT: AND_INT * T0.X, PV.W, 1,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %result = call i1 @llvm.is.fpclass.f32(float %x, i32 3) ; nan
+ %zext = zext i1 %result to i32
+ store i32 %zext, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @issue135083_f32(ptr addrspace(1) %out, float %x) {
+; CM-LABEL: issue135083_f32:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
+; CM-NEXT: SETGT_INT * T0.W, literal.x, PV.W,
+; CM-NEXT: 2139095040(INF), 0(0.000000e+00)
+; CM-NEXT: AND_INT * T0.X, PV.W, 1,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %result = call i1 @llvm.is.fpclass.f32(float %x, i32 504)
+ %zext = zext i1 %result to i32
+ store i32 %zext, ptr addrspace(1) %out, align 4
+ ret void
+}
+
+define amdgpu_kernel void @issue135083_v2f32(ptr addrspace(1) %out, <2 x float> %x) {
+; CM-LABEL: issue135083_v2f32:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: AND_INT * T0.W, KC0[3].X, literal.x,
+; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
+; CM-NEXT: SETGT_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT T0.Y, PV.W, 1, 0.0,
+; CM-NEXT: SETGT_INT * T0.W, PV.Z, literal.x,
+; CM-NEXT: 2139095039(3.402823e+38), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT * T0.X, PV.W, 1, 0.0,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %result = call <2 x i1> @llvm.is.fpclass.v2f32(<2 x float> %x, i32 504)
+ %zext = zext <2 x i1> %result to <2 x i32>
+ store <2 x i32> %zext, ptr addrspace(1) %out, align 8
+ ret void
+}
+
+define amdgpu_kernel void @issue135083_v3f32(ptr addrspace(1) %out, <3 x float> %x) {
+; CM-LABEL: issue135083_v3f32:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 17, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T3.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: AND_INT * T0.W, KC0[3].W, literal.x,
+; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.Z, KC0[3].Z, literal.x,
+; CM-NEXT: SETGT_INT * T0.W, PV.W, literal.y,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT T0.X, PV.W, 1, 0.0,
+; CM-NEXT: AND_INT T0.Y, KC0[3].Y, literal.x,
+; CM-NEXT: SETGT_INT T0.Z, PV.Z, literal.y,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T1.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, PV.Z, 1, 0.0,
+; CM-NEXT: SETGT_INT * T0.W, PV.Y, literal.y,
+; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT * T2.X, PV.W, 1, 0.0,
+; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %result = call <3 x i1> @llvm.is.fpclass.v3f32(<3 x float> %x, i32 504)
+ %zext = zext <3 x i1> %result to <3 x i32>
+ store <3 x i32> %zext, ptr addrspace(1) %out, align 16
+ ret void
+}
+
+define amdgpu_kernel void @issue135083_v4f32(ptr addrspace(1) %out, <4 x float> %x) {
+; CM-LABEL: issue135083_v4f32:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 18, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: AND_INT * T0.W, KC0[4].X, literal.x,
+; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
+; CM-NEXT: SETGT_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, KC0[3].W, literal.y,
+; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
+; CM-NEXT: AND_INT T0.Y, KC0[3].Z, literal.x,
+; CM-NEXT: SETGT_INT T1.Z, PV.W, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, PV.Z, 1, 0.0,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: AND_INT T1.Y, KC0[3].Y, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, PV.Z, 1, 0.0,
+; CM-NEXT: SETGT_INT * T1.W, PV.Y, literal.y,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT T0.Y, PV.W, 1, 0.0,
+; CM-NEXT: SETGT_INT * T1.W, PV.Y, literal.x,
+; CM-NEXT: 2139095039(3.402823e+38), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT * T0.X, PV.W, 1, 0.0,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %result = call <4 x i1> @llvm.is.fpclass.v3f32(<4 x float> %x, i32 504)
+ %zext = zext <4 x i1> %result to <4 x i32>
+ store <4 x i32> %zext, ptr addrspace(1) %out, align 16
+ ret void
+}
+
+define amdgpu_kernel void @issue135083_v8f32(ptr addrspace(1) %out, <8 x float> %x) {
+; CM-LABEL: issue135083_v8f32:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 34, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: AND_INT T0.Z, KC0[6].X, literal.x,
+; CM-NEXT: AND_INT * T0.W, KC0[4].W, literal.x,
+; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
+; CM-NEXT: AND_INT T0.X, KC0[4].Y, literal.x,
+; CM-NEXT: SETGT_INT T0.Y, PV.W, literal.y,
+; CM-NEXT: SETGT_INT T0.Z, PV.Z, literal.y,
+; CM-NEXT: AND_INT * T0.W, KC0[5].W, literal.x,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: AND_INT T1.X, KC0[5].Z, literal.x,
+; CM-NEXT: SETGT_INT T1.Y, PV.W, literal.y,
+; CM-NEXT: AND_INT T1.Z, KC0[5].X, literal.x,
+; CM-NEXT: CNDE_INT * T1.W, PV.Z, 1, 0.0,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: SETGT_INT T2.X, PV.Z, literal.x,
+; CM-NEXT: AND_INT T2.Y, KC0[5].Y, literal.y,
+; CM-NEXT: CNDE_INT T1.Z, PV.Y, 1, 0.0,
+; CM-NEXT: SETGT_INT * T0.W, PV.X, literal.x,
+; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
+; CM-NEXT: AND_INT T3.X, KC0[4].Z, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.W, 1, 0.0,
+; CM-NEXT: SETGT_INT T0.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE_INT * T0.W, PV.X, 1, 0.0,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT T1.X, PV.Z, 1, 0.0,
+; CM-NEXT: SETGT_INT T2.Y, PV.X, literal.x,
+; CM-NEXT: CNDE_INT T0.Z, T0.Y, 1, 0.0,
+; CM-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.y,
+; CM-NEXT: 2139095039(3.402823e+38), 16(2.242078e-44)
+; CM-NEXT: LSHR T2.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T0.Y, PV.Y, 1, 0.0,
+; CM-NEXT: SETGT_INT * T2.W, T0.X, literal.y,
+; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT * T0.X, PV.W, 1, 0.0,
+; CM-NEXT: LSHR * T3.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %result = call <8 x i1> @llvm.is.fpclass.v3f32(<8 x float> %x, i32 504)
+ %zext = zext <8 x i1> %result to <8 x i32>
+ store <8 x i32> %zext, ptr addrspace(1) %out, align 32
+ ret void
+}
+
+define amdgpu_kernel void @issue135083_v16f32(ptr addrspace(1) %out, <16 x float> %x) {
+; CM-LABEL: issue135083_v16f32:
+; CM: ; %bb.0:
+; CM-NEXT: ALU 69, @6, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T7.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2, T0.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T6.X
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 6:
+; CM-NEXT: AND_INT T0.Z, KC0[6].Y, literal.x,
+; CM-NEXT: AND_INT * T0.W, KC0[6].W, literal.x,
+; CM-NEXT: 2147483647(nan), 0(0.000000e+00)
+; CM-NEXT: SETGT_INT T0.X, PV.W, literal.x,
+; CM-NEXT: AND_INT T0.Y, KC0[6].Z, literal.y,
+; CM-NEXT: AND_INT T1.Z, KC0[7].Y, literal.y,
+; CM-NEXT: AND_INT * T0.W, KC0[7].X, literal.y,
+; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
+; CM-NEXT: SETGT_INT T1.X, PV.W, literal.x,
+; CM-NEXT: AND_INT T1.Y, KC0[7].Z, literal.y,
+; CM-NEXT: AND_INT T2.Z, KC0[8].X, literal.y,
+; CM-NEXT: AND_INT * T0.W, KC0[7].W, literal.y,
+; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
+; CM-NEXT: SETGT_INT T2.X, PV.W, literal.x,
+; CM-NEXT: SETGT_INT T2.Y, PV.Z, literal.x,
+; CM-NEXT: AND_INT T2.Z, KC0[10].X, literal.y,
+; CM-NEXT: AND_INT * T0.W, KC0[8].W, literal.y,
+; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
+; CM-NEXT: AND_INT T3.X, KC0[8].Y, literal.x,
+; CM-NEXT: SETGT_INT T3.Y, PV.W, literal.y,
+; CM-NEXT: SETGT_INT T2.Z, PV.Z, literal.y,
+; CM-NEXT: AND_INT * T0.W, KC0[9].W, literal.x,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: AND_INT T4.X, KC0[9].Z, literal.x,
+; CM-NEXT: SETGT_INT T4.Y, PV.W, literal.y,
+; CM-NEXT: AND_INT T3.Z, KC0[9].X, literal.x,
+; CM-NEXT: CNDE_INT * T4.W, PV.Z, 1, 0.0,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: SETGT_INT T5.X, PV.Z, literal.x,
+; CM-NEXT: AND_INT T5.Y, KC0[9].Y, literal.y,
+; CM-NEXT: CNDE_INT T4.Z, PV.Y, 1, 0.0,
+; CM-NEXT: SETGT_INT * T0.W, PV.X, literal.x,
+; CM-NEXT: 2139095039(3.402823e+38), 2147483647(nan)
+; CM-NEXT: AND_INT T6.X, KC0[8].Z, literal.x,
+; CM-NEXT: CNDE_INT T4.Y, PV.W, 1, 0.0,
+; CM-NEXT: SETGT_INT T2.Z, PV.Y, literal.y,
+; CM-NEXT: CNDE_INT * T3.W, PV.X, 1, 0.0,
+; CM-NEXT: 2147483647(nan), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT T4.X, PV.Z, 1, 0.0,
+; CM-NEXT: SETGT_INT T5.Y, PV.X, literal.x,
+; CM-NEXT: CNDE_INT T3.Z, T3.Y, 1, 0.0,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT: 2139095039(3.402823e+38), 48(6.726233e-44)
+; CM-NEXT: LSHR T5.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T3.Y, PV.Y, 1, 0.0,
+; CM-NEXT: SETGT_INT T2.Z, T3.X, literal.y,
+; CM-NEXT: CNDE_INT * T2.W, T2.Y, 1, 0.0,
+; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT T3.X, PV.Z, 1, 0.0,
+; CM-NEXT: SETGT_INT T1.Y, T1.Y, literal.x,
+; CM-NEXT: CNDE_INT T2.Z, T2.X, 1, 0.0,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT: 2139095039(3.402823e+38), 32(4.484155e-44)
+; CM-NEXT: LSHR T6.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T2.Y, PV.Y, 1, 0.0,
+; CM-NEXT: SETGT_INT T1.Z, T1.Z, literal.y,
+; CM-NEXT: CNDE_INT * T1.W, T1.X, 1, 0.0,
+; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT T2.X, PV.Z, 1, 0.0,
+; CM-NEXT: SETGT_INT T0.Y, T0.Y, literal.x,
+; CM-NEXT: CNDE_INT T1.Z, T0.X, 1, 0.0,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y,
+; CM-NEXT: 2139095039(3.402823e+38), 16(2.242078e-44)
+; CM-NEXT: LSHR T0.X, PV.W, literal.x,
+; CM-NEXT: CNDE_INT T1.Y, PV.Y, 1, 0.0,
+; CM-NEXT: SETGT_INT * T0.W, T0.Z, literal.y,
+; CM-NEXT: 2(2.802597e-45), 2139095039(3.402823e+38)
+; CM-NEXT: CNDE_INT * T1.X, PV.W, 1, 0.0,
+; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+ %result = call <16 x i1> @llvm.is.fpclass.v3f32(<16 x float> %x, i32 504)
+ %zext = zext <16 x i1> %result to <16 x i32>
+ store <16 x i32> %zext, ptr addrspace(1) %out, align 64
+ ret void
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
WOW, R600 related changes…
@@ -100,6 +100,11 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, | |||
|
|||
setOperationAction(ISD::FSUB, MVT::f32, Expand); | |||
|
|||
setOperationAction(ISD::IS_FPCLASS, | |||
{MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it really support this many data types?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Technically only need to set this for the minimum set of legal types, but it doesn't hurt to set it on illegal types that already defaulted to expand
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It turns out this is also working around some kind of bug in select / select_cc handling so I'm going to leave it alone
d43e6e1
to
2aac6b4
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/169/builds/10395 Here is the relevant piece of the build log for the reference
|
Fixes #135083