Skip to content

Commit b6f32ad

Browse files
authored
[NVPTX] Switch to untyped float registers (#137011)
Register types in PTX are simply syntactic sugar and emitting them has added lots of unnecessary complexity to the NVPTX backend. This change takes the first step to their removal by using ".b" registers instead of ".f" in all cases. This should shake out any potential issues or bugs in ptxas preventing full removal and pre-fetches many of the required test updates.
1 parent 0e0a166 commit b6f32ad

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+523
-523
lines changed

llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ using namespace llvm;
2626
namespace llvm {
2727
StringRef getNVPTXRegClassName(TargetRegisterClass const *RC) {
2828
if (RC == &NVPTX::Float32RegsRegClass)
29-
return ".f32";
29+
return ".b32";
3030
if (RC == &NVPTX::Float64RegsRegClass)
31-
return ".f64";
31+
return ".b64";
3232
if (RC == &NVPTX::Int128RegsRegClass)
3333
return ".b128";
3434
if (RC == &NVPTX::Int64RegsRegClass)

llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ define half @fh(ptr %p) {
4545
; ENABLED-LABEL: fh(
4646
; ENABLED: {
4747
; ENABLED-NEXT: .reg .b16 %rs<10>;
48-
; ENABLED-NEXT: .reg .f32 %f<13>;
48+
; ENABLED-NEXT: .reg .b32 %f<13>;
4949
; ENABLED-NEXT: .reg .b64 %rd<2>;
5050
; ENABLED-EMPTY:
5151
; ENABLED-NEXT: // %bb.0:
@@ -74,7 +74,7 @@ define half @fh(ptr %p) {
7474
; DISABLED-LABEL: fh(
7575
; DISABLED: {
7676
; DISABLED-NEXT: .reg .b16 %rs<10>;
77-
; DISABLED-NEXT: .reg .f32 %f<13>;
77+
; DISABLED-NEXT: .reg .b32 %f<13>;
7878
; DISABLED-NEXT: .reg .b64 %rd<2>;
7979
; DISABLED-EMPTY:
8080
; DISABLED-NEXT: // %bb.0:
@@ -121,7 +121,7 @@ define half @fh(ptr %p) {
121121
define float @ff(ptr %p) {
122122
; ENABLED-LABEL: ff(
123123
; ENABLED: {
124-
; ENABLED-NEXT: .reg .f32 %f<10>;
124+
; ENABLED-NEXT: .reg .b32 %f<10>;
125125
; ENABLED-NEXT: .reg .b64 %rd<2>;
126126
; ENABLED-EMPTY:
127127
; ENABLED-NEXT: // %bb.0:
@@ -137,7 +137,7 @@ define float @ff(ptr %p) {
137137
;
138138
; DISABLED-LABEL: ff(
139139
; DISABLED: {
140-
; DISABLED-NEXT: .reg .f32 %f<10>;
140+
; DISABLED-NEXT: .reg .b32 %f<10>;
141141
; DISABLED-NEXT: .reg .b64 %rd<2>;
142142
; DISABLED-EMPTY:
143143
; DISABLED-NEXT: // %bb.0:

llvm/test/CodeGen/NVPTX/and-or-setcc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ define i1 @and_ord(float %a, float %b) {
99
; CHECK: {
1010
; CHECK-NEXT: .reg .pred %p<2>;
1111
; CHECK-NEXT: .reg .b32 %r<2>;
12-
; CHECK-NEXT: .reg .f32 %f<3>;
12+
; CHECK-NEXT: .reg .b32 %f<3>;
1313
; CHECK-EMPTY:
1414
; CHECK-NEXT: // %bb.0:
1515
; CHECK-NEXT: ld.param.f32 %f1, [and_ord_param_0];
@@ -29,7 +29,7 @@ define i1 @or_uno(float %a, float %b) {
2929
; CHECK: {
3030
; CHECK-NEXT: .reg .pred %p<2>;
3131
; CHECK-NEXT: .reg .b32 %r<2>;
32-
; CHECK-NEXT: .reg .f32 %f<3>;
32+
; CHECK-NEXT: .reg .b32 %f<3>;
3333
; CHECK-EMPTY:
3434
; CHECK-NEXT: // %bb.0:
3535
; CHECK-NEXT: ld.param.f32 %f1, [or_uno_param_0];

llvm/test/CodeGen/NVPTX/atomics.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,7 @@ declare float @llvm.nvvm.atomic.load.add.f32.p0(ptr %addr, float %val)
351351
define float @atomic_add_f32_generic(ptr %addr, float %val) {
352352
; CHECK-LABEL: atomic_add_f32_generic(
353353
; CHECK: {
354-
; CHECK-NEXT: .reg .f32 %f<3>;
354+
; CHECK-NEXT: .reg .b32 %f<3>;
355355
; CHECK-NEXT: .reg .b64 %rd<2>;
356356
; CHECK-EMPTY:
357357
; CHECK-NEXT: // %bb.0:
@@ -370,7 +370,7 @@ declare float @llvm.nvvm.atomic.load.add.f32.p1(ptr addrspace(1) %addr, float %v
370370
define float @atomic_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
371371
; CHECK-LABEL: atomic_add_f32_addrspace1(
372372
; CHECK: {
373-
; CHECK-NEXT: .reg .f32 %f<3>;
373+
; CHECK-NEXT: .reg .b32 %f<3>;
374374
; CHECK-NEXT: .reg .b64 %rd<2>;
375375
; CHECK-EMPTY:
376376
; CHECK-NEXT: // %bb.0:
@@ -389,7 +389,7 @@ declare float @llvm.nvvm.atomic.load.add.f32.p3(ptr addrspace(3) %addr, float %v
389389
define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
390390
; CHECK-LABEL: atomic_add_f32_addrspace3(
391391
; CHECK: {
392-
; CHECK-NEXT: .reg .f32 %f<3>;
392+
; CHECK-NEXT: .reg .b32 %f<3>;
393393
; CHECK-NEXT: .reg .b64 %rd<2>;
394394
; CHECK-EMPTY:
395395
; CHECK-NEXT: // %bb.0:
@@ -406,7 +406,7 @@ define float @atomic_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
406406
define float @atomicrmw_add_f32_generic(ptr %addr, float %val) {
407407
; CHECK-LABEL: atomicrmw_add_f32_generic(
408408
; CHECK: {
409-
; CHECK-NEXT: .reg .f32 %f<3>;
409+
; CHECK-NEXT: .reg .b32 %f<3>;
410410
; CHECK-NEXT: .reg .b64 %rd<2>;
411411
; CHECK-EMPTY:
412412
; CHECK-NEXT: // %bb.0:
@@ -426,7 +426,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
426426
; CHECK-NEXT: .reg .pred %p<2>;
427427
; CHECK-NEXT: .reg .b16 %rs<5>;
428428
; CHECK-NEXT: .reg .b32 %r<17>;
429-
; CHECK-NEXT: .reg .f32 %f<4>;
429+
; CHECK-NEXT: .reg .b32 %f<4>;
430430
; CHECK-NEXT: .reg .b64 %rd<3>;
431431
; CHECK-EMPTY:
432432
; CHECK-NEXT: // %bb.0:
@@ -470,7 +470,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
470470
define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
471471
; CHECK-LABEL: atomicrmw_add_f32_addrspace1(
472472
; CHECK: {
473-
; CHECK-NEXT: .reg .f32 %f<3>;
473+
; CHECK-NEXT: .reg .b32 %f<3>;
474474
; CHECK-NEXT: .reg .b64 %rd<2>;
475475
; CHECK-EMPTY:
476476
; CHECK-NEXT: // %bb.0:
@@ -487,7 +487,7 @@ define float @atomicrmw_add_f32_addrspace1(ptr addrspace(1) %addr, float %val) {
487487
define float @atomicrmw_add_f32_addrspace3(ptr addrspace(3) %addr, float %val) {
488488
; CHECK-LABEL: atomicrmw_add_f32_addrspace3(
489489
; CHECK: {
490-
; CHECK-NEXT: .reg .f32 %f<3>;
490+
; CHECK-NEXT: .reg .b32 %f<3>;
491491
; CHECK-NEXT: .reg .b64 %rd<2>;
492492
; CHECK-EMPTY:
493493
; CHECK-NEXT: // %bb.0:

0 commit comments

Comments
 (0)