Skip to content

Commit 00fe344

Browse files
committed
fixup - flip operand order
1 parent 2880f55 commit 00fe344

File tree

4 files changed

+40
-41
lines changed

4 files changed

+40
-41
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -733,12 +733,12 @@ def fpround_oneuse : PatFrag<(ops node:$a), (fpround node:$a), [{
733733

734734
def : Pat<(v2bf16 (build_vector (bf16 (fpround_oneuse Float32Regs:$a)),
735735
(bf16 (fpround_oneuse Float32Regs:$b)))),
736-
(CVT_bf16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>,
736+
(CVT_bf16x2_f32 Float32Regs:$b, Float32Regs:$a, CvtRN)>,
737737
Requires<[hasPTX<70>, hasSM<80>, hasBF16Math]>;
738738

739739
def : Pat<(v2f16 (build_vector (f16 (fpround_oneuse Float32Regs:$a)),
740740
(f16 (fpround_oneuse Float32Regs:$b)))),
741-
(CVT_f16x2_f32 Float32Regs:$a, Float32Regs:$b, CvtRN)>,
741+
(CVT_f16x2_f32 Float32Regs:$b, Float32Regs:$a, CvtRN)>,
742742
Requires<[hasPTX<70>, hasSM<80>, useFP16Math]>;
743743

744744
//-----------------------------------

llvm/test/CodeGen/NVPTX/bf16-instructions.ll

Lines changed: 36 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -212,12 +212,12 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
212212
; SM80-NEXT: ld.param.b32 %r1, [test_faddx2_param_0];
213213
; SM80-NEXT: ld.param.b32 %r2, [test_faddx2_param_1];
214214
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
215-
; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
215+
; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
216216
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
217-
; SM80-NEXT: cvt.f32.bf16 %f2, %rs4;
217+
; SM80-NEXT: cvt.f32.bf16 %f2, %rs3;
218218
; SM80-NEXT: add.rn.f32 %f3, %f2, %f1;
219-
; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
220-
; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
219+
; SM80-NEXT: cvt.f32.bf16 %f4, %rs2;
220+
; SM80-NEXT: cvt.f32.bf16 %f5, %rs4;
221221
; SM80-NEXT: add.rn.f32 %f6, %f5, %f4;
222222
; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
223223
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -233,12 +233,12 @@ define <2 x bfloat> @test_faddx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
233233
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_faddx2_param_0];
234234
; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_faddx2_param_1];
235235
; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
236-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2;
236+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1;
237237
; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
238-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4;
238+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3;
239239
; SM80-FTZ-NEXT: add.rn.ftz.f32 %f3, %f2, %f1;
240-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1;
241-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3;
240+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2;
241+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4;
242242
; SM80-FTZ-NEXT: add.rn.ftz.f32 %f6, %f5, %f4;
243243
; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
244244
; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -315,12 +315,12 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
315315
; SM80-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
316316
; SM80-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
317317
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
318-
; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
318+
; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
319319
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
320-
; SM80-NEXT: cvt.f32.bf16 %f2, %rs4;
320+
; SM80-NEXT: cvt.f32.bf16 %f2, %rs3;
321321
; SM80-NEXT: sub.rn.f32 %f3, %f2, %f1;
322-
; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
323-
; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
322+
; SM80-NEXT: cvt.f32.bf16 %f4, %rs2;
323+
; SM80-NEXT: cvt.f32.bf16 %f5, %rs4;
324324
; SM80-NEXT: sub.rn.f32 %f6, %f5, %f4;
325325
; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
326326
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -336,12 +336,12 @@ define <2 x bfloat> @test_fsubx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
336336
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fsubx2_param_0];
337337
; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fsubx2_param_1];
338338
; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
339-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2;
339+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1;
340340
; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
341-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4;
341+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3;
342342
; SM80-FTZ-NEXT: sub.rn.ftz.f32 %f3, %f2, %f1;
343-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1;
344-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3;
343+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2;
344+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4;
345345
; SM80-FTZ-NEXT: sub.rn.ftz.f32 %f6, %f5, %f4;
346346
; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
347347
; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -418,12 +418,12 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
418418
; SM80-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
419419
; SM80-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
420420
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
421-
; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
421+
; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
422422
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
423-
; SM80-NEXT: cvt.f32.bf16 %f2, %rs4;
423+
; SM80-NEXT: cvt.f32.bf16 %f2, %rs3;
424424
; SM80-NEXT: mul.rn.f32 %f3, %f2, %f1;
425-
; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
426-
; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
425+
; SM80-NEXT: cvt.f32.bf16 %f4, %rs2;
426+
; SM80-NEXT: cvt.f32.bf16 %f5, %rs4;
427427
; SM80-NEXT: mul.rn.f32 %f6, %f5, %f4;
428428
; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
429429
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -439,12 +439,12 @@ define <2 x bfloat> @test_fmulx2(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
439439
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fmulx2_param_0];
440440
; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fmulx2_param_1];
441441
; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
442-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2;
442+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1;
443443
; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
444-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4;
444+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3;
445445
; SM80-FTZ-NEXT: mul.rn.ftz.f32 %f3, %f2, %f1;
446-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1;
447-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3;
446+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2;
447+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4;
448448
; SM80-FTZ-NEXT: mul.rn.ftz.f32 %f6, %f5, %f4;
449449
; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
450450
; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -521,12 +521,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
521521
; SM80-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
522522
; SM80-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
523523
; SM80-NEXT: mov.b32 {%rs1, %rs2}, %r2;
524-
; SM80-NEXT: cvt.f32.bf16 %f1, %rs2;
524+
; SM80-NEXT: cvt.f32.bf16 %f1, %rs1;
525525
; SM80-NEXT: mov.b32 {%rs3, %rs4}, %r1;
526-
; SM80-NEXT: cvt.f32.bf16 %f2, %rs4;
526+
; SM80-NEXT: cvt.f32.bf16 %f2, %rs3;
527527
; SM80-NEXT: div.rn.f32 %f3, %f2, %f1;
528-
; SM80-NEXT: cvt.f32.bf16 %f4, %rs1;
529-
; SM80-NEXT: cvt.f32.bf16 %f5, %rs3;
528+
; SM80-NEXT: cvt.f32.bf16 %f4, %rs2;
529+
; SM80-NEXT: cvt.f32.bf16 %f5, %rs4;
530530
; SM80-NEXT: div.rn.f32 %f6, %f5, %f4;
531531
; SM80-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
532532
; SM80-NEXT: st.param.b32 [func_retval0], %r3;
@@ -542,12 +542,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
542542
; SM80-FTZ-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
543543
; SM80-FTZ-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
544544
; SM80-FTZ-NEXT: mov.b32 {%rs1, %rs2}, %r2;
545-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs2;
545+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f1, %rs1;
546546
; SM80-FTZ-NEXT: mov.b32 {%rs3, %rs4}, %r1;
547-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs4;
547+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f2, %rs3;
548548
; SM80-FTZ-NEXT: div.rn.ftz.f32 %f3, %f2, %f1;
549-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs1;
550-
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs3;
549+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f4, %rs2;
550+
; SM80-FTZ-NEXT: cvt.ftz.f32.bf16 %f5, %rs4;
551551
; SM80-FTZ-NEXT: div.rn.ftz.f32 %f6, %f5, %f4;
552552
; SM80-FTZ-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
553553
; SM80-FTZ-NEXT: st.param.b32 [func_retval0], %r3;
@@ -563,12 +563,12 @@ define <2 x bfloat> @test_fdiv(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
563563
; SM90-NEXT: ld.param.b32 %r1, [test_fdiv_param_0];
564564
; SM90-NEXT: ld.param.b32 %r2, [test_fdiv_param_1];
565565
; SM90-NEXT: mov.b32 {%rs1, %rs2}, %r2;
566-
; SM90-NEXT: cvt.f32.bf16 %f1, %rs2;
566+
; SM90-NEXT: cvt.f32.bf16 %f1, %rs1;
567567
; SM90-NEXT: mov.b32 {%rs3, %rs4}, %r1;
568-
; SM90-NEXT: cvt.f32.bf16 %f2, %rs4;
568+
; SM90-NEXT: cvt.f32.bf16 %f2, %rs3;
569569
; SM90-NEXT: div.rn.f32 %f3, %f2, %f1;
570-
; SM90-NEXT: cvt.f32.bf16 %f4, %rs1;
571-
; SM90-NEXT: cvt.f32.bf16 %f5, %rs3;
570+
; SM90-NEXT: cvt.f32.bf16 %f4, %rs2;
571+
; SM90-NEXT: cvt.f32.bf16 %f5, %rs4;
572572
; SM90-NEXT: div.rn.f32 %f6, %f5, %f4;
573573
; SM90-NEXT: cvt.rn.bf16x2.f32 %r3, %f6, %f3;
574574
; SM90-NEXT: st.param.b32 [func_retval0], %r3;

llvm/test/CodeGen/NVPTX/bf16x2-instructions-approx.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
77
declare <2 x bfloat> @llvm.sin.f16(<2 x bfloat> %a) #0
88
declare <2 x bfloat> @llvm.cos.f16(<2 x bfloat> %a) #0
99

10-
1110
define <2 x bfloat> @test_sin(<2 x bfloat> %a) #0 #1 {
1211
; CHECK-LABEL: test_sin(
1312
; CHECK: {

llvm/test/CodeGen/NVPTX/convert-sm80.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,7 @@ define <2 x bfloat> @fold_ff2bf16x2(float %a, float %b) {
233233
; CHECK-NEXT: // %bb.0:
234234
; CHECK-NEXT: ld.param.f32 %f1, [fold_ff2bf16x2_param_0];
235235
; CHECK-NEXT: ld.param.f32 %f2, [fold_ff2bf16x2_param_1];
236-
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f1, %f2;
236+
; CHECK-NEXT: cvt.rn.bf16x2.f32 %r1, %f2, %f1;
237237
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
238238
; CHECK-NEXT: ret;
239239
%ah = fptrunc float %a to bfloat
@@ -252,7 +252,7 @@ define <2 x half> @fold_ff2f16x2(float %a, float %b) {
252252
; CHECK-NEXT: // %bb.0:
253253
; CHECK-NEXT: ld.param.f32 %f1, [fold_ff2f16x2_param_0];
254254
; CHECK-NEXT: ld.param.f32 %f2, [fold_ff2f16x2_param_1];
255-
; CHECK-NEXT: cvt.rn.f16x2.f32 %r1, %f1, %f2;
255+
; CHECK-NEXT: cvt.rn.f16x2.f32 %r1, %f2, %f1;
256256
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
257257
; CHECK-NEXT: ret;
258258
%ah = fptrunc float %a to half

0 commit comments

Comments
 (0)