@@ -212,12 +212,12 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
212
212
; SM80-NEXT: ld.param.b32 %r1, [test_faddx2_param_0];
213
213
; SM80-NEXT: ld.param.b32 %r2, [test_faddx2_param_1];
214
214
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
215
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
215
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
216
216
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
217
- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
217
+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
218
218
; SM80-NEXT: add.rn.f32 %f3, %f2, %f1;
219
- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1 ;
220
- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
219
+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2 ;
220
+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4 ;
221
221
; SM80-NEXT: add.rn.f32 %f6, %f5, %f4;
222
222
; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
223
223
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -233,12 +233,12 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
233
233
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_faddx2_param_0];
234
234
; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_faddx2_param_1];
235
235
; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
236
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2 ;
236
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1 ;
237
237
; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
238
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4 ;
238
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3 ;
239
239
; SM80-FTZ-NEXT: add.rn.ftz.f32 %f3, %f2, %f1;
240
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1 ;
241
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3 ;
240
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2 ;
241
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4 ;
242
242
; SM80-FTZ-NEXT: add.rn.ftz.f32 %f6, %f5, %f4;
243
243
; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
244
244
; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -315,12 +315,12 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
315
315
; SM80-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
316
316
; SM80-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
317
317
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
318
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
318
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
319
319
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
320
- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
320
+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
321
321
; SM80-NEXT: sub.rn.f32 %f3, %f2, %f1;
322
- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1 ;
323
- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
322
+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2 ;
323
+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4 ;
324
324
; SM80-NEXT: sub.rn.f32 %f6, %f5, %f4;
325
325
; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
326
326
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -336,12 +336,12 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
336
336
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
337
337
; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
338
338
; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
339
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2 ;
339
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1 ;
340
340
; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
341
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4 ;
341
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3 ;
342
342
; SM80-FTZ-NEXT: sub.rn.ftz.f32 %f3, %f2, %f1;
343
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1 ;
344
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3 ;
343
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2 ;
344
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4 ;
345
345
; SM80-FTZ-NEXT: sub.rn.ftz.f32 %f6, %f5, %f4;
346
346
; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
347
347
; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -418,12 +418,12 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
418
418
; SM80-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
419
419
; SM80-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
420
420
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
421
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
421
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
422
422
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
423
- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
423
+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
424
424
; SM80-NEXT: mul.rn.f32 %f3, %f2, %f1;
425
- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1 ;
426
- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
425
+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2 ;
426
+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4 ;
427
427
; SM80-NEXT: mul.rn.f32 %f6, %f5, %f4;
428
428
; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
429
429
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -439,12 +439,12 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
439
439
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
440
440
; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
441
441
; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
442
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2 ;
442
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1 ;
443
443
; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
444
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4 ;
444
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3 ;
445
445
; SM80-FTZ-NEXT: mul.rn.ftz.f32 %f3, %f2, %f1;
446
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1 ;
447
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3 ;
446
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2 ;
447
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4 ;
448
448
; SM80-FTZ-NEXT: mul.rn.ftz.f32 %f6, %f5, %f4;
449
449
; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
450
450
; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -521,12 +521,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
521
521
; SM80-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
522
522
; SM80-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
523
523
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
524
- ; SM80-NEXT: cvt.f32.bf16 %f1, %rs2 ;
524
+ ; SM80-NEXT: cvt.f32.bf16 %f1, %rs1 ;
525
525
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
526
- ; SM80-NEXT: cvt.f32.bf16 %f2, %rs4 ;
526
+ ; SM80-NEXT: cvt.f32.bf16 %f2, %rs3 ;
527
527
; SM80-NEXT: div.rn.f32 %f3, %f2, %f1;
528
- ; SM80-NEXT: cvt.f32.bf16 %f4, %rs1 ;
529
- ; SM80-NEXT: cvt.f32.bf16 %f5, %rs3 ;
528
+ ; SM80-NEXT: cvt.f32.bf16 %f4, %rs2 ;
529
+ ; SM80-NEXT: cvt.f32.bf16 %f5, %rs4 ;
530
530
; SM80-NEXT: div.rn.f32 %f6, %f5, %f4;
531
531
; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
532
532
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -542,12 +542,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
542
542
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
543
543
; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
544
544
; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
545
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2 ;
545
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1 ;
546
546
; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
547
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4 ;
547
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3 ;
548
548
; SM80-FTZ-NEXT: div.rn.ftz.f32 %f3, %f2, %f1;
549
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1 ;
550
- ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3 ;
549
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2 ;
550
+ ; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4 ;
551
551
; SM80-FTZ-NEXT: div.rn.ftz.f32 %f6, %f5, %f4;
552
552
; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
553
553
; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -563,12 +563,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
563
563
; SM90-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
564
564
; SM90-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
565
565
; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r2;
566
- ; SM90-NEXT: cvt.f32.bf16 %f1, %rs2 ;
566
+ ; SM90-NEXT: cvt.f32.bf16 %f1, %rs1 ;
567
567
; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r1;
568
- ; SM90-NEXT: cvt.f32.bf16 %f2, %rs4 ;
568
+ ; SM90-NEXT: cvt.f32.bf16 %f2, %rs3 ;
569
569
; SM90-NEXT: div.rn.f32 %f3, %f2, %f1;
570
- ; SM90-NEXT: cvt.f32.bf16 %f4, %rs1 ;
571
- ; SM90-NEXT: cvt.f32.bf16 %f5, %rs3 ;
570
+ ; SM90-NEXT: cvt.f32.bf16 %f4, %rs2 ;
571
+ ; SM90-NEXT: cvt.f32.bf16 %f5, %rs4 ;
572
572
; SM90-NEXT: div.rn.f32 %f6, %f5, %f4;
573
573
; SM90-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
574
574
; SM90-NEXT: st.param.b32 [func_retval0], %r3;
0 commit comments