@@ -159,8 +159,8 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
159
159
; SM70-LABEL: test_faddx2(
160
160
; SM70: {
161
161
; SM70-NEXT: .reg .pred %p<3>;
162
- ; SM70-NEXT: .reg .b16 %rs<13 >;
163
- ; SM70-NEXT: .reg .b32 %r<24 >;
162
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
163
+ ; SM70-NEXT: .reg .b32 %r<25 >;
164
164
; SM70-NEXT: .reg .f32 %f<7>;
165
165
; SM70-EMPTY:
166
166
; SM70-NEXT: // %bb.0:
@@ -182,7 +182,6 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
182
182
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
183
183
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
184
184
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
185
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
186
185
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
187
186
; SM70-NEXT: shl.b32 %r14, %r13, 16;
188
187
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -197,8 +196,7 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
197
196
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
198
197
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
199
198
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
200
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
201
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
199
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
202
200
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
203
201
; SM70-NEXT: ret;
204
202
;
@@ -262,8 +260,8 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
262
260
; SM70-LABEL: test_fsubx2(
263
261
; SM70: {
264
262
; SM70-NEXT: .reg .pred %p<3>;
265
- ; SM70-NEXT: .reg .b16 %rs<13 >;
266
- ; SM70-NEXT: .reg .b32 %r<24 >;
263
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
264
+ ; SM70-NEXT: .reg .b32 %r<25 >;
267
265
; SM70-NEXT: .reg .f32 %f<7>;
268
266
; SM70-EMPTY:
269
267
; SM70-NEXT: // %bb.0:
@@ -285,7 +283,6 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
285
283
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
286
284
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
287
285
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
288
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
289
286
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
290
287
; SM70-NEXT: shl.b32 %r14, %r13, 16;
291
288
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -300,8 +297,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
300
297
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
301
298
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
302
299
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
303
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
304
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
300
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
305
301
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
306
302
; SM70-NEXT: ret;
307
303
;
@@ -365,8 +361,8 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
365
361
; SM70-LABEL: test_fmulx2(
366
362
; SM70: {
367
363
; SM70-NEXT: .reg .pred %p<3>;
368
- ; SM70-NEXT: .reg .b16 %rs<13 >;
369
- ; SM70-NEXT: .reg .b32 %r<24 >;
364
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
365
+ ; SM70-NEXT: .reg .b32 %r<25 >;
370
366
; SM70-NEXT: .reg .f32 %f<7>;
371
367
; SM70-EMPTY:
372
368
; SM70-NEXT: // %bb.0:
@@ -388,7 +384,6 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
388
384
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
389
385
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
390
386
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
391
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
392
387
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
393
388
; SM70-NEXT: shl.b32 %r14, %r13, 16;
394
389
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -403,8 +398,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
403
398
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
404
399
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
405
400
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
406
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
407
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
401
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
408
402
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
409
403
; SM70-NEXT: ret;
410
404
;
@@ -468,8 +462,8 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
468
462
; SM70-LABEL: test_fdiv(
469
463
; SM70: {
470
464
; SM70-NEXT: .reg .pred %p<3>;
471
- ; SM70-NEXT: .reg .b16 %rs<13 >;
472
- ; SM70-NEXT: .reg .b32 %r<24 >;
465
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
466
+ ; SM70-NEXT: .reg .b32 %r<25 >;
473
467
; SM70-NEXT: .reg .f32 %f<7>;
474
468
; SM70-EMPTY:
475
469
; SM70-NEXT: // %bb.0:
@@ -491,7 +485,6 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
491
485
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
492
486
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
493
487
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
494
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
495
488
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
496
489
; SM70-NEXT: shl.b32 %r14, %r13, 16;
497
490
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -506,8 +499,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
506
499
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
507
500
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
508
501
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
509
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
510
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
502
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
511
503
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
512
504
; SM70-NEXT: ret;
513
505
;
@@ -1706,8 +1698,8 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
1706
1698
; SM70-LABEL: test_maxnum_v2(
1707
1699
; SM70: {
1708
1700
; SM70-NEXT: .reg .pred %p<3>;
1709
- ; SM70-NEXT: .reg .b16 %rs<13 >;
1710
- ; SM70-NEXT: .reg .b32 %r<24 >;
1701
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
1702
+ ; SM70-NEXT: .reg .b32 %r<25 >;
1711
1703
; SM70-NEXT: .reg .f32 %f<7>;
1712
1704
; SM70-EMPTY:
1713
1705
; SM70-NEXT: // %bb.0:
@@ -1729,7 +1721,6 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
1729
1721
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
1730
1722
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
1731
1723
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
1732
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
1733
1724
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
1734
1725
; SM70-NEXT: shl.b32 %r14, %r13, 16;
1735
1726
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -1744,8 +1735,7 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
1744
1735
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
1745
1736
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
1746
1737
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
1747
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
1748
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
1738
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
1749
1739
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
1750
1740
; SM70-NEXT: ret;
1751
1741
;
0 commit comments