-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[NVPTX] Switch to untyped float registers #137011
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NVPTX] Switch to untyped float registers #137011
Conversation
@llvm/pr-subscribers-debuginfo @llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesRegister types in PTX are simply syntactic sugar and emitting them has added lots of unnecessary complexity to the NVPTX backend. This change takes the first step to their removal by using ".b" registers instead of ".f" in all cases. This should shake out any potential issues or bugs in ptxas preventing full removal and pre-fetches many of the required test updates. Patch is 190.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137011.diff 44 Files Affected:
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 142388893082a..6b9797c3e6aae 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -26,9 +26,9 @@ using namespace llvm;
namespace llvm {
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass)
- return ".f32";
+ return ".b32";
if (RC == &NVPTX::Float64RegsRegClass)
- return ".f64";
+ return ".b64";
if (RC == &NVPTX::Int128RegsRegClass)
return ".b128";
if (RC == &NVPTX::Int64RegsRegClass)
diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
index e46657e4a582f..8f0964c2d5eba 100644
--- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
+++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll
@@ -45,7 +45,7 @@ define half @fh(ptr %p) {
; ENABLED-LABEL: fh(
; ENABLED: {
; ENABLED-NEXT: .reg .b16 %rs<10>;
-; ENABLED-NEXT: .reg .f32 %f<13>;
+; ENABLED-NEXT: .reg .b32 %f<13>;
; ENABLED-NEXT: .reg .b64 %rd<2>;
; ENABLED-EMPTY:
; ENABLED-NEXT: // %bb.0:
@@ -74,7 +74,7 @@ define half @fh(ptr %p) {
; DISABLED-LABEL: fh(
; DISABLED: {
; DISABLED-NEXT: .reg .b16 %rs<10>;
-; DISABLED-NEXT: .reg .f32 %f<13>;
+; DISABLED-NEXT: .reg .b32 %f<13>;
; DISABLED-NEXT: .reg .b64 %rd<2>;
; DISABLED-EMPTY:
; DISABLED-NEXT: // %bb.0:
@@ -121,7 +121,7 @@ define half @fh(ptr %p) {
define float @ff(ptr %p) {
; ENABLED-LABEL: ff(
; ENABLED: {
-; ENABLED-NEXT: .reg .f32 %f<10>;
+; ENABLED-NEXT: .reg .b32 %f<10>;
; ENABLED-NEXT: .reg .b64 %rd<2>;
; ENABLED-EMPTY:
; ENABLED-NEXT: // %bb.0:
@@ -137,7 +137,7 @@ define float @ff(ptr %p) {
;
; DISABLED-LABEL: ff(
; DISABLED: {
-; DISABLED-NEXT: .reg .f32 %f<10>;
+; DISABLED-NEXT: .reg .b32 %f<10>;
; DISABLED-NEXT: .reg .b64 %rd<2>;
; DISABLED-EMPTY:
; DISABLED-NEXT: // %bb.0:
diff --git a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
index 6c3514c1ad946..5949de335b8cf 100644
--- a/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/and-or-setcc.ll
@@ -9,7 +9,7 @@ define i1 @and_ord(float %a, float %b) {
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<2>;
-; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b32 %f<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0];
@@ -29,7 +29,7 @@ define i1 @or_uno(float %a, float %b) {
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b32 %r<2>;
-; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b32 %f<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0];
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index bb04aa856d656..16de80d55a054 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -351,7 +351,7 @@ declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
define float @atomic_add_f32_generic(ptr %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_generic(
; CHECK: {
-; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -370,7 +370,7 @@ declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %v
define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_addrspace1(
; CHECK: {
-; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -389,7 +389,7 @@ declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %v
define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK-LABEL: atomic_add_f32_addrspace3(
; CHECK: {
-; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -406,7 +406,7 @@ define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_generic(
; CHECK: {
-; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -426,7 +426,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<5>;
; CHECK-NEXT: .reg .b32 %r<17>;
-; CHECK-NEXT: .reg .f32 %f<4>;
+; CHECK-NEXT: .reg .b32 %f<4>;
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -470,7 +470,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_addrspace1(
; CHECK: {
-; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
@@ -487,7 +487,7 @@ define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
define float @atomicrmw_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
; CHECK-LABEL: atomicrmw_add_f32_addrspace3(
; CHECK: {
-; CHECK-NEXT: .reg .f32 %f<3>;
+; CHECK-NEXT: .reg .b32 %f<3>;
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index b97cb6fa3cbe4..6be13c3a6fdec 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -19,7 +19,7 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) {
; SM70-NEXT: .reg .pred %p<2>;
; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<11>;
-; SM70-NEXT: .reg .f32 %f<4>;
+; SM70-NEXT: .reg .b32 %f<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u16 %r1, [test_fadd_param_1];
@@ -55,7 +55,7 @@ define bfloat @test_fadd(bfloat %0, bfloat %1) {
; SM80-FTZ-LABEL: test_fadd(
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<4>;
-; SM80-FTZ-NEXT: .reg .f32 %f<4>;
+; SM80-FTZ-NEXT: .reg .b32 %f<4>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fadd_param_0];
@@ -87,7 +87,7 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) {
; SM70-NEXT: .reg .pred %p<2>;
; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<11>;
-; SM70-NEXT: .reg .f32 %f<4>;
+; SM70-NEXT: .reg .b32 %f<4>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u16 %r1, [test_fsub_param_1];
@@ -123,7 +123,7 @@ define bfloat @test_fsub(bfloat %0, bfloat %1) {
; SM80-FTZ-LABEL: test_fsub(
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<4>;
-; SM80-FTZ-NEXT: .reg .f32 %f<4>;
+; SM80-FTZ-NEXT: .reg .b32 %f<4>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fsub_param_0];
@@ -155,7 +155,7 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<5>;
; SM70-NEXT: .reg .b32 %r<24>;
-; SM70-NEXT: .reg .f32 %f<7>;
+; SM70-NEXT: .reg .b32 %f<7>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b32 %r1, [test_faddx2_param_0];
@@ -210,7 +210,7 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<5>;
; SM80-FTZ-NEXT: .reg .b32 %r<4>;
-; SM80-FTZ-NEXT: .reg .f32 %f<7>;
+; SM80-FTZ-NEXT: .reg .b32 %f<7>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_faddx2_param_0];
@@ -247,7 +247,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<5>;
; SM70-NEXT: .reg .b32 %r<24>;
-; SM70-NEXT: .reg .f32 %f<7>;
+; SM70-NEXT: .reg .b32 %f<7>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
@@ -302,7 +302,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<5>;
; SM80-FTZ-NEXT: .reg .b32 %r<4>;
-; SM80-FTZ-NEXT: .reg .f32 %f<7>;
+; SM80-FTZ-NEXT: .reg .b32 %f<7>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
@@ -339,7 +339,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<5>;
; SM70-NEXT: .reg .b32 %r<24>;
-; SM70-NEXT: .reg .f32 %f<7>;
+; SM70-NEXT: .reg .b32 %f<7>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
@@ -394,7 +394,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<5>;
; SM80-FTZ-NEXT: .reg .b32 %r<4>;
-; SM80-FTZ-NEXT: .reg .f32 %f<7>;
+; SM80-FTZ-NEXT: .reg .b32 %f<7>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
@@ -431,7 +431,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<5>;
; SM70-NEXT: .reg .b32 %r<24>;
-; SM70-NEXT: .reg .f32 %f<7>;
+; SM70-NEXT: .reg .b32 %f<7>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
@@ -474,7 +474,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM80: {
; SM80-NEXT: .reg .b16 %rs<5>;
; SM80-NEXT: .reg .b32 %r<4>;
-; SM80-NEXT: .reg .f32 %f<7>;
+; SM80-NEXT: .reg .b32 %f<7>;
; SM80-EMPTY:
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
@@ -495,7 +495,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<5>;
; SM80-FTZ-NEXT: .reg .b32 %r<4>;
-; SM80-FTZ-NEXT: .reg .f32 %f<7>;
+; SM80-FTZ-NEXT: .reg .b32 %f<7>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
@@ -516,7 +516,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; SM90: {
; SM90-NEXT: .reg .b16 %rs<5>;
; SM90-NEXT: .reg .b32 %r<4>;
-; SM90-NEXT: .reg .f32 %f<7>;
+; SM90-NEXT: .reg .b32 %f<7>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
; SM90-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
@@ -566,7 +566,7 @@ define float @test_fpext_float(bfloat %a) #0 {
; SM70-LABEL: test_fpext_float(
; SM70: {
; SM70-NEXT: .reg .b32 %r<3>;
-; SM70-NEXT: .reg .f32 %f<2>;
+; SM70-NEXT: .reg .b32 %f<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u16 %r1, [test_fpext_float_param_0];
@@ -578,7 +578,7 @@ define float @test_fpext_float(bfloat %a) #0 {
; SM80-LABEL: test_fpext_float(
; SM80: {
; SM80-NEXT: .reg .b16 %rs<2>;
-; SM80-NEXT: .reg .f32 %f<2>;
+; SM80-NEXT: .reg .b32 %f<2>;
; SM80-EMPTY:
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.b16 %rs1, [test_fpext_float_param_0];
@@ -589,7 +589,7 @@ define float @test_fpext_float(bfloat %a) #0 {
; SM80-FTZ-LABEL: test_fpext_float(
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<2>;
-; SM80-FTZ-NEXT: .reg .f32 %f<2>;
+; SM80-FTZ-NEXT: .reg .b32 %f<2>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fpext_float_param_0];
@@ -600,7 +600,7 @@ define float @test_fpext_float(bfloat %a) #0 {
; SM90-LABEL: test_fpext_float(
; SM90: {
; SM90-NEXT: .reg .b16 %rs<2>;
-; SM90-NEXT: .reg .f32 %f<2>;
+; SM90-NEXT: .reg .b32 %f<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
; SM90-NEXT: ld.param.b16 %rs1, [test_fpext_float_param_0];
@@ -617,7 +617,7 @@ define bfloat @test_fptrunc_float(float %a) #0 {
; SM70-NEXT: .reg .pred %p<2>;
; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<7>;
-; SM70-NEXT: .reg .f32 %f<2>;
+; SM70-NEXT: .reg .b32 %f<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.f32 %f1, [test_fptrunc_float_param_0];
@@ -635,7 +635,7 @@ define bfloat @test_fptrunc_float(float %a) #0 {
; SM80-LABEL: test_fptrunc_float(
; SM80: {
; SM80-NEXT: .reg .b16 %rs<2>;
-; SM80-NEXT: .reg .f32 %f<2>;
+; SM80-NEXT: .reg .b32 %f<2>;
; SM80-EMPTY:
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.f32 %f1, [test_fptrunc_float_param_0];
@@ -646,7 +646,7 @@ define bfloat @test_fptrunc_float(float %a) #0 {
; SM80-FTZ-LABEL: test_fptrunc_float(
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<2>;
-; SM80-FTZ-NEXT: .reg .f32 %f<2>;
+; SM80-FTZ-NEXT: .reg .b32 %f<2>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.f32 %f1, [test_fptrunc_float_param_0];
@@ -657,7 +657,7 @@ define bfloat @test_fptrunc_float(float %a) #0 {
; SM90-LABEL: test_fptrunc_float(
; SM90: {
; SM90-NEXT: .reg .b16 %rs<2>;
-; SM90-NEXT: .reg .f32 %f<2>;
+; SM90-NEXT: .reg .b32 %f<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
; SM90-NEXT: ld.param.f32 %f1, [test_fptrunc_float_param_0];
@@ -674,7 +674,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 {
; SM70-NEXT: .reg .pred %p<2>;
; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<9>;
-; SM70-NEXT: .reg .f32 %f<3>;
+; SM70-NEXT: .reg .b32 %f<3>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u16 %r1, [test_fadd_imm_1_param_0];
@@ -706,7 +706,7 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 {
; SM80-FTZ-LABEL: test_fadd_imm_1(
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<3>;
-; SM80-FTZ-NEXT: .reg .f32 %f<3>;
+; SM80-FTZ-NEXT: .reg .b32 %f<3>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fadd_imm_1_param_0];
@@ -735,7 +735,7 @@ define bfloat @test_select_cc_bf16_f64(double %a, double %b, bfloat %c, bfloat %
; CHECK: {
; CHECK-NEXT: .reg .pred %p<2>;
; CHECK-NEXT: .reg .b16 %rs<4>;
-; CHECK-NEXT: .reg .f64 %fd<3>;
+; CHECK-NEXT: .reg .b64 %fd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.f64 %fd1, [test_select_cc_bf16_f64_param_0];
@@ -756,7 +756,7 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM70: {
; SM70-NEXT: .reg .b16 %rs<9>;
; SM70-NEXT: .reg .b32 %r<21>;
-; SM70-NEXT: .reg .f32 %f<9>;
+; SM70-NEXT: .reg .b32 %f<9>;
; SM70-NEXT: .reg .b64 %rd<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
@@ -798,7 +798,7 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM80: {
; SM80-NEXT: .reg .b16 %rs<9>;
; SM80-NEXT: .reg .b32 %r<5>;
-; SM80-NEXT: .reg .f32 %f<9>;
+; SM80-NEXT: .reg .b32 %f<9>;
; SM80-NEXT: .reg .b64 %rd<2>;
; SM80-EMPTY:
; SM80-NEXT: // %bb.0:
@@ -824,7 +824,7 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<9>;
; SM80-FTZ-NEXT: .reg .b32 %r<5>;
-; SM80-FTZ-NEXT: .reg .f32 %f<9>;
+; SM80-FTZ-NEXT: .reg .b32 %f<9>;
; SM80-FTZ-NEXT: .reg .b64 %rd<2>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
@@ -850,7 +850,7 @@ define <8 x float> @test_extload_bf16x8(ptr addrspace(3) noundef %arg) #0 {
; SM90: {
; SM90-NEXT: .reg .b16 %rs<9>;
; SM90-NEXT: .reg .b32 %r<5>;
-; SM90-NEXT: .reg .f32 %f<9>;
+; SM90-NEXT: .reg .b32 %f<9>;
; SM90-NEXT: .reg .b64 %rd<2>;
; SM90-EMPTY:
; SM90-NEXT: // %bb.0:
@@ -881,7 +881,7 @@ define i16 @test_fptosi_i16(bfloat %a) {
; SM70: {
; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .f32 %f<2>;
+; SM70-NEXT: .reg .b32 %f<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u16 %r1, [test_fptosi_i16_param_0];
@@ -896,7 +896,7 @@ define i16 @test_fptosi_i16(bfloat %a) {
; SM80: {
; SM80-NEXT: .reg .b16 %rs<3>;
; SM80-NEXT: .reg .b32 %r<2>;
-; SM80-NEXT: .reg .f32 %f<2>;
+; SM80-NEXT: .reg .b32 %f<2>;
; SM80-EMPTY:
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.b16 %rs1, [test_fptosi_i16_param_0];
@@ -910,7 +910,7 @@ define i16 @test_fptosi_i16(bfloat %a) {
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<3>;
; SM80-FTZ-NEXT: .reg .b32 %r<2>;
-; SM80-FTZ-NEXT: .reg .f32 %f<2>;
+; SM80-FTZ-NEXT: .reg .b32 %f<2>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fptosi_i16_param_0];
@@ -940,7 +940,7 @@ define i16 @test_fptoui_i16(bfloat %a) {
; SM70: {
; SM70-NEXT: .reg .b16 %rs<2>;
; SM70-NEXT: .reg .b32 %r<4>;
-; SM70-NEXT: .reg .f32 %f<2>;
+; SM70-NEXT: .reg .b32 %f<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u16 %r1, [test_fptoui_i16_param_0];
@@ -955,7 +955,7 @@ define i16 @test_fptoui_i16(bfloat %a) {
; SM80: {
; SM80-NEXT: .reg .b16 %rs<3>;
; SM80-NEXT: .reg .b32 %r<2>;
-; SM80-NEXT: .reg .f32 %f<2>;
+; SM80-NEXT: .reg .b32 %f<2>;
; SM80-EMPTY:
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.b16 %rs1, [test_fptoui_i16_param_0];
@@ -969,7 +969,7 @@ define i16 @test_fptoui_i16(bfloat %a) {
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<3>;
; SM80-FTZ-NEXT: .reg .b32 %r<2>;
-; SM80-FTZ-NEXT: .reg .f32 %f<2>;
+; SM80-FTZ-NEXT: .reg .b32 %f<2>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.b16 %rs1, [test_fptoui_i16_param_0];
@@ -1000,7 +1000,7 @@ define bfloat @test_sitofp_i16(i16 %a) {
; SM70-NEXT: .reg .pred %p<2>;
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b32 %r<7>;
-; SM70-NEXT: .reg .f32 %f<2>;
+; SM70-NEXT: .reg .b32 %f<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u16 %rs1, [test_sitofp_i16_param_0];
@@ -1019,7 +1019,7 @@ define bfloat @test_sitofp_i16(i16 %a) {
; SM80-LABEL: test_sitofp_i16(
; SM80: {
; SM80-NEXT: .reg .b16 %rs<3>;
-; SM80-NEXT: .reg .f32 %f<2>;
+; SM80-NEXT: .reg .b32 %f<2>;
; SM80-EMPTY:
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.u16 %rs1, [test_sitofp_i16_param_0];
@@ -1031,7 +1031,7 @@ define bfloat @test_sitofp_i16(i16 %a) {
; SM80-FTZ-LABEL: test_sitofp_i16(
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<3>;
-; SM80-FTZ-NEXT: .reg .f32 %f<2>;
+; SM80-FTZ-NEXT: .reg .b32 %f<2>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.u16 %rs1, [test_sitofp_i16_param_0];
@@ -1059,7 +1059,7 @@ define bfloat @test_uitofp_i8(i8 %a) {
; SM70-NEXT: .reg .pred %p<2>;
; SM70-NEXT: .reg .b16 %rs<3>;
; SM70-NEXT: .reg .b32 %r<7>;
-; SM70-NEXT: .reg .f32 %f<2>;
+; SM70-NEXT: .reg .b32 %f<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u8 %rs1, [test_uitofp_i8_param_0];
@@ -1078,7 +1078,7 @@ define bfloat @test_uitofp_i8(i8 %a) {
; SM80-LABEL: test_uitofp_i8(
; SM80: {
; SM80-NEXT: .reg .b16 %rs<3>;
-; SM80-NEXT: .reg .f32 %f<2>;
+; SM80-NEXT: .reg .b32 %f<2>;
; SM80-EMPTY:
; SM80-NEXT: // %bb.0:
; SM80-NEXT: ld.param.u8 %rs1, [test_uitofp_i8_param_0];
@@ -1090,7 +1090,7 @@ define bfloat @test_uitofp_i8(i8 %a) {
; SM80-FTZ-LABEL: test_uitofp_i8(
; SM80-FTZ: {
; SM80-FTZ-NEXT: .reg .b16 %rs<3>;
-; SM80-FTZ-NEXT: .reg .f32 %f<2>;
+; SM80-FTZ-NEXT: .reg .b32 %f<2>;
; SM80-FTZ-EMPTY:
; SM80-FTZ-NEXT: // %bb.0:
; SM80-FTZ-NEXT: ld.param.u8 %rs1, [test_uitofp_i8_param_0];
@@ -1118,7 +1118,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM70-NEXT: .reg .pred %p<3>;
; SM70-NEXT: .reg .b16 %rs<4>;
; SM70-NEXT: .reg .b32 %r<8>;
-; SM70-NEXT: .reg .f32 %f<2>;
+; SM70-NEXT: .reg .b32 %f<2>;
; SM70-EMPTY:
; SM70-NEXT: // %bb.0:
; SM70-NEXT: ld.param.u8 %rs1, [test_uitofp_i1_param_0];
@@ -1142,7 +1142,7 @@ define bfloat @test_uitofp_i1(i1 %a) {
; SM80-NEXT: .reg .pred %p<2>;
; SM80-NEXT: .reg .b16 %rs<4>;
; SM80-NEXT: .reg .b32...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Excited to see this coming to fruition!
Why not get rid of the |
That is absolutely the goal, but I want to start with this very simple change. That way, if any issues arise, we can distinguish between a |
SGTM. |
fb9bf21
to
bf2766e
Compare
Register types in PTX are simply syntactic sugar and emitting them has added lots of unnecessary complexity to the NVPTX backend. This change takes the first step to their removal by using ".b" registers instead of ".f" in all cases. This should shake out any potential issues or bugs in ptxas preventing full removal and pre-fetches many of the required test updates.
Register types in PTX are simply syntactic sugar and emitting them has added lots of unnecessary complexity to the NVPTX backend. This change takes the first step to their removal by using ".b" registers instead of ".f" in all cases. This should shake out any potential issues or bugs in ptxas preventing full removal and pre-fetches many of the required test updates.
Register types in PTX are simply syntactic sugar and emitting them has added lots of unnecessary complexity to the NVPTX backend. This change takes the first step to their removal by using ".b" registers instead of ".f" in all cases. This should shake out any potential issues or bugs in ptxas preventing full removal and pre-fetches many of the required test updates.
Register types in PTX are simply syntactic sugar and emitting them has added lots of unnecessary complexity to the NVPTX backend. This change takes the first step to their removal by using ".b" registers instead of ".f" in all cases. This should shake out any potential issues or bugs in ptxas preventing full removal and pre-fetches many of the required test updates.
Register types in PTX are simply syntactic sugar and emitting them has added lots of unnecessary complexity to the NVPTX backend. This change takes the first step to their removal by using ".b" registers instead of ".f" in all cases. This should shake out any potential issues or bugs in ptxas preventing full removal and pre-fetches many of the required test updates.