@@ -159,8 +159,8 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
159
159
; SM70-LABEL: test_faddx2(
160
160
; SM70: {
161
161
; SM70-NEXT: .reg .pred %p<3>;
162
- ; SM70-NEXT: .reg .b16 %rs<13 >;
163
- ; SM70-NEXT: .reg .b32 %r<24 >;
162
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
163
+ ; SM70-NEXT: .reg .b32 %r<25 >;
164
164
; SM70-NEXT: .reg .f32 %f<7>;
165
165
; SM70-EMPTY:
166
166
; SM70-NEXT: // %bb.0:
@@ -182,7 +182,6 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
182
182
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
183
183
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
184
184
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
185
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
186
185
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
187
186
; SM70-NEXT: shl.b32 %r14, %r13, 16;
188
187
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -197,8 +196,7 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
197
196
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
198
197
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
199
198
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
200
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
201
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
199
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
202
200
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
203
201
; SM70-NEXT: ret;
204
202
;
@@ -266,8 +264,8 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
266
264
; SM70-LABEL: test_fsubx2(
267
265
; SM70: {
268
266
; SM70-NEXT: .reg .pred %p<3>;
269
- ; SM70-NEXT: .reg .b16 %rs<13 >;
270
- ; SM70-NEXT: .reg .b32 %r<24 >;
267
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
268
+ ; SM70-NEXT: .reg .b32 %r<25 >;
271
269
; SM70-NEXT: .reg .f32 %f<7>;
272
270
; SM70-EMPTY:
273
271
; SM70-NEXT: // %bb.0:
@@ -289,7 +287,6 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
289
287
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
290
288
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
291
289
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
292
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
293
290
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
294
291
; SM70-NEXT: shl.b32 %r14, %r13, 16;
295
292
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -304,8 +301,7 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
304
301
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
305
302
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
306
303
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
307
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
308
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
304
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
309
305
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
310
306
; SM70-NEXT: ret;
311
307
;
@@ -373,8 +369,8 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
373
369
; SM70-LABEL: test_fmulx2(
374
370
; SM70: {
375
371
; SM70-NEXT: .reg .pred %p<3>;
376
- ; SM70-NEXT: .reg .b16 %rs<13 >;
377
- ; SM70-NEXT: .reg .b32 %r<24 >;
372
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
373
+ ; SM70-NEXT: .reg .b32 %r<25 >;
378
374
; SM70-NEXT: .reg .f32 %f<7>;
379
375
; SM70-EMPTY:
380
376
; SM70-NEXT: // %bb.0:
@@ -396,7 +392,6 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
396
392
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
397
393
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
398
394
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
399
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
400
395
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
401
396
; SM70-NEXT: shl.b32 %r14, %r13, 16;
402
397
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -411,8 +406,7 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
411
406
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
412
407
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
413
408
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
414
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
415
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
409
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
416
410
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
417
411
; SM70-NEXT: ret;
418
412
;
@@ -480,8 +474,8 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
480
474
; SM70-LABEL: test_fdiv(
481
475
; SM70: {
482
476
; SM70-NEXT: .reg .pred %p<3>;
483
- ; SM70-NEXT: .reg .b16 %rs<13 >;
484
- ; SM70-NEXT: .reg .b32 %r<24 >;
477
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
478
+ ; SM70-NEXT: .reg .b32 %r<25 >;
485
479
; SM70-NEXT: .reg .f32 %f<7>;
486
480
; SM70-EMPTY:
487
481
; SM70-NEXT: // %bb.0:
@@ -503,7 +497,6 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
503
497
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
504
498
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
505
499
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
506
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
507
500
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
508
501
; SM70-NEXT: shl.b32 %r14, %r13, 16;
509
502
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -518,8 +511,7 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
518
511
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
519
512
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
520
513
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
521
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
522
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
514
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
523
515
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
524
516
; SM70-NEXT: ret;
525
517
;
@@ -1724,8 +1716,8 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
1724
1716
; SM70-LABEL: test_maxnum_v2(
1725
1717
; SM70: {
1726
1718
; SM70-NEXT: .reg .pred %p<3>;
1727
- ; SM70-NEXT: .reg .b16 %rs<13 >;
1728
- ; SM70-NEXT: .reg .b32 %r<24 >;
1719
+ ; SM70-NEXT: .reg .b16 %rs<9 >;
1720
+ ; SM70-NEXT: .reg .b32 %r<25 >;
1729
1721
; SM70-NEXT: .reg .f32 %f<7>;
1730
1722
; SM70-EMPTY:
1731
1723
; SM70-NEXT: // %bb.0:
@@ -1747,7 +1739,6 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
1747
1739
; SM70-NEXT: setp.nan.f32 %p1, %f3, %f3;
1748
1740
; SM70-NEXT: or.b32 %r11, %r7, 4194304;
1749
1741
; SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
1750
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs7}, %r12; }
1751
1742
; SM70-NEXT: cvt.u32.u16 %r13, %rs1;
1752
1743
; SM70-NEXT: shl.b32 %r14, %r13, 16;
1753
1744
; SM70-NEXT: mov.b32 %f4, %r14;
@@ -1762,8 +1753,7 @@ define <2 x bfloat> @test_maxnum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
1762
1753
; SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
1763
1754
; SM70-NEXT: or.b32 %r21, %r17, 4194304;
1764
1755
; SM70-NEXT: selp.b32 %r22, %r21, %r20, %p2;
1765
- ; SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs11}, %r22; }
1766
- ; SM70-NEXT: mov.b32 %r23, {%rs11, %rs7};
1756
+ ; SM70-NEXT: prmt.b32 %r23, %r22, %r12, 0x7632U;
1767
1757
; SM70-NEXT: st.param.b32 [func_retval0], %r23;
1768
1758
; SM70-NEXT: ret;
1769
1759
;
0 commit comments