@@ -22,20 +22,18 @@ define <2 x bfloat> @test_ret_const() #0 {
22
22
define <2 x bfloat> @test_fadd_imm_0 (<2 x bfloat> %a ) #0 {
23
23
; SM80-LABEL: test_fadd_imm_0(
24
24
; SM80: {
25
- ; SM80-NEXT: .reg .b16 %rs<5 >;
25
+ ; SM80-NEXT: .reg .b16 %rs<3 >;
26
26
; SM80-NEXT: .reg .b32 %r<3>;
27
27
; SM80-NEXT: .reg .f32 %f<5>;
28
28
; SM80-EMPTY:
29
29
; SM80-NEXT: // %bb.0:
30
30
; SM80-NEXT: ld.param.b32 %r1, [test_fadd_imm_0_param_0];
31
31
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
32
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
33
- ; SM80-NEXT: add.rn.f32 %f2, %f1, 0f40000000;
34
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
35
- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
36
- ; SM80-NEXT: add.rn.f32 %f4, %f3, 0f3F800000;
37
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
38
- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
32
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
33
+ ; SM80-NEXT: add.rn.f32 %f2, %f1, 0f3F800000;
34
+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
35
+ ; SM80-NEXT: add.rn.f32 %f4, %f3, 0f40000000;
36
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
39
37
; SM80-NEXT: st.param.b32 [func_retval0], %r2;
40
38
; SM80-NEXT: ret;
41
39
;
@@ -84,24 +82,22 @@ define bfloat @test_fadd_imm_1(bfloat %a) #0 {
84
82
define <2 x bfloat> @test_fsubx2 (<2 x bfloat> %a , <2 x bfloat> %b ) #0 {
85
83
; SM80-LABEL: test_fsubx2(
86
84
; SM80: {
87
- ; SM80-NEXT: .reg .b16 %rs<7 >;
85
+ ; SM80-NEXT: .reg .b16 %rs<5 >;
88
86
; SM80-NEXT: .reg .b32 %r<4>;
89
87
; SM80-NEXT: .reg .f32 %f<7>;
90
88
; SM80-EMPTY:
91
89
; SM80-NEXT: // %bb.0:
92
90
; SM80-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
93
91
; SM80-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
94
92
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
95
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
93
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
96
94
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
97
- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
95
+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
98
96
; SM80-NEXT: sub.rn.f32 %f3, %f2, %f1;
99
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
100
- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
101
- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
97
+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2;
98
+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4;
102
99
; SM80-NEXT: sub.rn.f32 %f6, %f5, %f4;
103
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
104
- ; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5};
100
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
105
101
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
106
102
; SM80-NEXT: ret;
107
103
;
@@ -122,24 +118,22 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
122
118
define <2 x bfloat> @test_fmulx2 (<2 x bfloat> %a , <2 x bfloat> %b ) #0 {
123
119
; SM80-LABEL: test_fmulx2(
124
120
; SM80: {
125
- ; SM80-NEXT: .reg .b16 %rs<7 >;
121
+ ; SM80-NEXT: .reg .b16 %rs<5 >;
126
122
; SM80-NEXT: .reg .b32 %r<4>;
127
123
; SM80-NEXT: .reg .f32 %f<7>;
128
124
; SM80-EMPTY:
129
125
; SM80-NEXT: // %bb.0:
130
126
; SM80-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
131
127
; SM80-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
132
128
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
133
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
129
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
134
130
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
135
- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
131
+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
136
132
; SM80-NEXT: mul.rn.f32 %f3, %f2, %f1;
137
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
138
- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
139
- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
133
+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2;
134
+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4;
140
135
; SM80-NEXT: mul.rn.f32 %f6, %f5, %f4;
141
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
142
- ; SM80-NEXT: mov.b32 %r3, {%rs6, %rs5};
136
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
143
137
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
144
138
; SM80-NEXT: ret;
145
139
;
@@ -160,24 +154,22 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
160
154
define <2 x bfloat> @test_fdiv (<2 x bfloat> %a , <2 x bfloat> %b ) #0 {
161
155
; CHECK-LABEL: test_fdiv(
162
156
; CHECK: {
163
- ; CHECK-NEXT: .reg .b16 %rs<7 >;
157
+ ; CHECK-NEXT: .reg .b16 %rs<5 >;
164
158
; CHECK-NEXT: .reg .b32 %r<4>;
165
159
; CHECK-NEXT: .reg .f32 %f<7>;
166
160
; CHECK-EMPTY:
167
161
; CHECK-NEXT: // %bb.0:
168
162
; CHECK-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
169
163
; CHECK-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
170
164
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2;
171
- ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2 ;
165
+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs1 ;
172
166
; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1;
173
- ; CHECK-NEXT: cvt.f32.bf16 %f2, %rs4 ;
167
+ ; CHECK-NEXT: cvt.f32.bf16 %f2, %rs3 ;
174
168
; CHECK-NEXT: div.rn.f32 %f3, %f2, %f1;
175
- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs5, %f3;
176
- ; CHECK-NEXT: cvt.f32.bf16 %f4, %rs1;
177
- ; CHECK-NEXT: cvt.f32.bf16 %f5, %rs3;
169
+ ; CHECK-NEXT: cvt.f32.bf16 %f4, %rs2;
170
+ ; CHECK-NEXT: cvt.f32.bf16 %f5, %rs4;
178
171
; CHECK-NEXT: div.rn.f32 %f6, %f5, %f4;
179
- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs6, %f6;
180
- ; CHECK-NEXT: mov.b32 %r3, {%rs6, %rs5};
172
+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
181
173
; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
182
174
; CHECK-NEXT: ret;
183
175
%r = fdiv <2 x bfloat> %a , %b
@@ -418,15 +410,12 @@ define <2 x bfloat> @test_select_cc_bf16_f32(<2 x bfloat> %a, <2 x bfloat> %b,
418
410
define <2 x bfloat> @test_fptrunc_2xfloat (<2 x float > %a ) #0 {
419
411
; CHECK-LABEL: test_fptrunc_2xfloat(
420
412
; CHECK: {
421
- ; CHECK-NEXT: .reg .b16 %rs<3>;
422
413
; CHECK-NEXT: .reg .b32 %r<2>;
423
414
; CHECK-NEXT: .reg .f32 %f<3>;
424
415
; CHECK-EMPTY:
425
416
; CHECK-NEXT: // %bb.0:
426
417
; CHECK-NEXT: ld.param.v2.f32 {%f1, %f2}, [test_fptrunc_2xfloat_param_0];
427
- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs1, %f2;
428
- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs2, %f1;
429
- ; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1};
418
+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f2, %f1;
430
419
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
431
420
; CHECK-NEXT: ret;
432
421
%r = fptrunc <2 x float > %a to <2 x bfloat>
@@ -503,20 +492,18 @@ declare <2 x bfloat> @llvm.fmuladd.f16(<2 x bfloat> %a, <2 x bfloat> %b, <2 x bf
503
492
define <2 x bfloat> @test_sqrt (<2 x bfloat> %a ) #0 {
504
493
; CHECK-LABEL: test_sqrt(
505
494
; CHECK: {
506
- ; CHECK-NEXT: .reg .b16 %rs<5 >;
495
+ ; CHECK-NEXT: .reg .b16 %rs<3 >;
507
496
; CHECK-NEXT: .reg .b32 %r<3>;
508
497
; CHECK-NEXT: .reg .f32 %f<5>;
509
498
; CHECK-EMPTY:
510
499
; CHECK-NEXT: // %bb.0:
511
500
; CHECK-NEXT: ld.param.b32 %r1, [test_sqrt_param_0];
512
501
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
513
- ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2 ;
502
+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs1 ;
514
503
; CHECK-NEXT: sqrt.rn.f32 %f2, %f1;
515
- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
516
- ; CHECK-NEXT: cvt.f32.bf16 %f3, %rs1;
504
+ ; CHECK-NEXT: cvt.f32.bf16 %f3, %rs2;
517
505
; CHECK-NEXT: sqrt.rn.f32 %f4, %f3;
518
- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
519
- ; CHECK-NEXT: mov.b32 %r2, {%rs4, %rs3};
506
+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
520
507
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
521
508
; CHECK-NEXT: ret;
522
509
%r = call <2 x bfloat> @llvm.sqrt.f16 (<2 x bfloat> %a )
@@ -556,33 +543,29 @@ define <2 x bfloat> @test_fabs(<2 x bfloat> %a) #0 {
556
543
define <2 x bfloat> @test_fabs_add (<2 x bfloat> %a , <2 x bfloat> %b ) #0 {
557
544
; SM80-LABEL: test_fabs_add(
558
545
; SM80: {
559
- ; SM80-NEXT: .reg .b16 %rs<11 >;
546
+ ; SM80-NEXT: .reg .b16 %rs<7 >;
560
547
; SM80-NEXT: .reg .b32 %r<6>;
561
548
; SM80-NEXT: .reg .f32 %f<11>;
562
549
; SM80-EMPTY:
563
550
; SM80-NEXT: // %bb.0:
564
551
; SM80-NEXT: ld.param.b32 %r1, [test_fabs_add_param_1];
565
552
; SM80-NEXT: ld.param.b32 %r2, [test_fabs_add_param_0];
566
553
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
567
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
554
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
568
555
; SM80-NEXT: add.rn.f32 %f2, %f1, %f1;
569
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
570
- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
556
+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
571
557
; SM80-NEXT: add.rn.f32 %f4, %f3, %f3;
572
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
573
- ; SM80-NEXT: mov.b32 %r3, {%rs4, %rs3};
558
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f4, %f2;
574
559
; SM80-NEXT: abs.bf16x2 %r4, %r3;
575
- ; SM80-NEXT: mov.b32 {%rs5 , %rs6 }, %r4;
576
- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs6 ;
577
- ; SM80-NEXT: mov.b32 {%rs7 , %rs8 }, %r1;
578
- ; SM80-NEXT: cvt.f32.bf16 %f6, %rs8 ;
560
+ ; SM80-NEXT: mov.b32 {%rs3 , %rs4 }, %r4;
561
+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
562
+ ; SM80-NEXT: mov.b32 {%rs5 , %rs6 }, %r1;
563
+ ; SM80-NEXT: cvt.f32.bf16 %f6, %rs5 ;
579
564
; SM80-NEXT: add.rn.f32 %f7, %f5, %f6;
580
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs9, %f7;
581
- ; SM80-NEXT: cvt.f32.bf16 %f8, %rs5;
582
- ; SM80-NEXT: cvt.f32.bf16 %f9, %rs7;
565
+ ; SM80-NEXT: cvt.f32.bf16 %f8, %rs4;
566
+ ; SM80-NEXT: cvt.f32.bf16 %f9, %rs6;
583
567
; SM80-NEXT: add.rn.f32 %f10, %f8, %f9;
584
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs10, %f10;
585
- ; SM80-NEXT: mov.b32 %r5, {%rs10, %rs9};
568
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r5, %f10, %f7;
586
569
; SM80-NEXT: st.param.b32 [func_retval0], %r5;
587
570
; SM80-NEXT: ret;
588
571
;
@@ -637,20 +620,18 @@ define <2 x bfloat> @test_maxnum(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
637
620
define <2 x bfloat> @test_floor (<2 x bfloat> %a ) #0 {
638
621
; SM80-LABEL: test_floor(
639
622
; SM80: {
640
- ; SM80-NEXT: .reg .b16 %rs<5 >;
623
+ ; SM80-NEXT: .reg .b16 %rs<3 >;
641
624
; SM80-NEXT: .reg .b32 %r<3>;
642
625
; SM80-NEXT: .reg .f32 %f<5>;
643
626
; SM80-EMPTY:
644
627
; SM80-NEXT: // %bb.0:
645
628
; SM80-NEXT: ld.param.b32 %r1, [test_floor_param_0];
646
629
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
647
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
630
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
648
631
; SM80-NEXT: cvt.rmi.f32.f32 %f2, %f1;
649
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
650
- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
632
+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
651
633
; SM80-NEXT: cvt.rmi.f32.f32 %f4, %f3;
652
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
653
- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
634
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
654
635
; SM80-NEXT: st.param.b32 [func_retval0], %r2;
655
636
; SM80-NEXT: ret;
656
637
;
@@ -674,20 +655,18 @@ define <2 x bfloat> @test_floor(<2 x bfloat> %a) #0 {
674
655
define <2 x bfloat> @test_ceil (<2 x bfloat> %a ) #0 {
675
656
; SM80-LABEL: test_ceil(
676
657
; SM80: {
677
- ; SM80-NEXT: .reg .b16 %rs<5 >;
658
+ ; SM80-NEXT: .reg .b16 %rs<3 >;
678
659
; SM80-NEXT: .reg .b32 %r<3>;
679
660
; SM80-NEXT: .reg .f32 %f<5>;
680
661
; SM80-EMPTY:
681
662
; SM80-NEXT: // %bb.0:
682
663
; SM80-NEXT: ld.param.b32 %r1, [test_ceil_param_0];
683
664
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
684
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
665
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
685
666
; SM80-NEXT: cvt.rpi.f32.f32 %f2, %f1;
686
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
687
- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
667
+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
688
668
; SM80-NEXT: cvt.rpi.f32.f32 %f4, %f3;
689
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
690
- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
669
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
691
670
; SM80-NEXT: st.param.b32 [func_retval0], %r2;
692
671
; SM80-NEXT: ret;
693
672
;
@@ -711,20 +690,18 @@ define <2 x bfloat> @test_ceil(<2 x bfloat> %a) #0 {
711
690
define <2 x bfloat> @test_trunc (<2 x bfloat> %a ) #0 {
712
691
; SM80-LABEL: test_trunc(
713
692
; SM80: {
714
- ; SM80-NEXT: .reg .b16 %rs<5 >;
693
+ ; SM80-NEXT: .reg .b16 %rs<3 >;
715
694
; SM80-NEXT: .reg .b32 %r<3>;
716
695
; SM80-NEXT: .reg .f32 %f<5>;
717
696
; SM80-EMPTY:
718
697
; SM80-NEXT: // %bb.0:
719
698
; SM80-NEXT: ld.param.b32 %r1, [test_trunc_param_0];
720
699
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
721
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
700
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
722
701
; SM80-NEXT: cvt.rzi.f32.f32 %f2, %f1;
723
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
724
- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
702
+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
725
703
; SM80-NEXT: cvt.rzi.f32.f32 %f4, %f3;
726
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
727
- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
704
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
728
705
; SM80-NEXT: st.param.b32 [func_retval0], %r2;
729
706
; SM80-NEXT: ret;
730
707
;
@@ -748,20 +725,18 @@ define <2 x bfloat> @test_trunc(<2 x bfloat> %a) #0 {
748
725
define <2 x bfloat> @test_rint (<2 x bfloat> %a ) #0 {
749
726
; SM80-LABEL: test_rint(
750
727
; SM80: {
751
- ; SM80-NEXT: .reg .b16 %rs<5 >;
728
+ ; SM80-NEXT: .reg .b16 %rs<3 >;
752
729
; SM80-NEXT: .reg .b32 %r<3>;
753
730
; SM80-NEXT: .reg .f32 %f<5>;
754
731
; SM80-EMPTY:
755
732
; SM80-NEXT: // %bb.0:
756
733
; SM80-NEXT: ld.param.b32 %r1, [test_rint_param_0];
757
734
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r1;
758
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
735
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
759
736
; SM80-NEXT: cvt.rni.f32.f32 %f2, %f1;
760
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs3, %f2;
761
- ; SM80-NEXT: cvt.f32.bf16 %f3, %rs1;
737
+ ; SM80-NEXT: cvt.f32.bf16 %f3, %rs2;
762
738
; SM80-NEXT: cvt.rni.f32.f32 %f4, %f3;
763
- ; SM80-NEXT: cvt.rn.bf16.f32 %rs4, %f4;
764
- ; SM80-NEXT: mov.b32 %r2, {%rs4, %rs3};
739
+ ; SM80-NEXT: cvt.rn.bf16x2.f32 %r2, %f4, %f2;
765
740
; SM80-NEXT: st.param.b32 [func_retval0], %r2;
766
741
; SM80-NEXT: ret;
767
742
;
@@ -786,14 +761,14 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 {
786
761
; CHECK-LABEL: test_round(
787
762
; CHECK: {
788
763
; CHECK-NEXT: .reg .pred %p<5>;
789
- ; CHECK-NEXT: .reg .b16 %rs<5 >;
764
+ ; CHECK-NEXT: .reg .b16 %rs<3 >;
790
765
; CHECK-NEXT: .reg .b32 %r<9>;
791
766
; CHECK-NEXT: .reg .f32 %f<17>;
792
767
; CHECK-EMPTY:
793
768
; CHECK-NEXT: // %bb.0:
794
769
; CHECK-NEXT: ld.param.b32 %r1, [test_round_param_0];
795
770
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
796
- ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs2 ;
771
+ ; CHECK-NEXT: cvt.f32.bf16 %f1, %rs1 ;
797
772
; CHECK-NEXT: mov.b32 %r2, %f1;
798
773
; CHECK-NEXT: and.b32 %r3, %r2, -2147483648;
799
774
; CHECK-NEXT: or.b32 %r4, %r3, 1056964608;
@@ -806,8 +781,7 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 {
806
781
; CHECK-NEXT: cvt.rzi.f32.f32 %f7, %f1;
807
782
; CHECK-NEXT: setp.lt.f32 %p2, %f5, 0f3F000000;
808
783
; CHECK-NEXT: selp.f32 %f8, %f7, %f6, %p2;
809
- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs3, %f8;
810
- ; CHECK-NEXT: cvt.f32.bf16 %f9, %rs1;
784
+ ; CHECK-NEXT: cvt.f32.bf16 %f9, %rs2;
811
785
; CHECK-NEXT: mov.b32 %r5, %f9;
812
786
; CHECK-NEXT: and.b32 %r6, %r5, -2147483648;
813
787
; CHECK-NEXT: or.b32 %r7, %r6, 1056964608;
@@ -820,8 +794,7 @@ define <2 x bfloat> @test_round(<2 x bfloat> %a) #0 {
820
794
; CHECK-NEXT: cvt.rzi.f32.f32 %f15, %f9;
821
795
; CHECK-NEXT: setp.lt.f32 %p4, %f13, 0f3F000000;
822
796
; CHECK-NEXT: selp.f32 %f16, %f15, %f14, %p4;
823
- ; CHECK-NEXT: cvt.rn.bf16.f32 %rs4, %f16;
824
- ; CHECK-NEXT: mov.b32 %r8, {%rs4, %rs3};
797
+ ; CHECK-NEXT: cvt.rn.bf16x2.f32 %r8, %f16, %f8;
825
798
; CHECK-NEXT: st.param.b32 [func_retval0], %r8;
826
799
; CHECK-NEXT: ret;
827
800
%r = call <2 x bfloat> @llvm.round.f16 (<2 x bfloat> %a )
0 commit comments