Skip to content

Commit 79a6425

Browse files
grey-eminencesys_zuul
authored andcommitted
Optimize double precision SQRT instruction.
Change-Id: I68ddfc05c517421590c84f481da408ea86d0eb55
1 parent e153430 commit 79a6425

File tree

1 file changed

+5
-10
lines changed

1 file changed

+5
-10
lines changed

visa/TranslationInterface.cpp

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1537,6 +1537,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
15371537
G4_Declare *t0 = getImmDcl(createDFImm(0.0), element_size);
15381538
G4_Declare *t1 = getImmDcl(createDFImm(1.0), element_size);
15391539
G4_Declare *t2 = getImmDcl(createDFImm(0.5), element_size);
1540+
G4_Declare* t3 = getImmDcl(createDFImm(1.5), element_size);
15401541
G4_Declare *t6 = createTempVarWithNoSpill(element_size, Type_DF, Any);
15411542
G4_Declare *t7 = createTempVarWithNoSpill(element_size, Type_DF, Any);
15421543
G4_Declare *t8 = createTempVarWithNoSpill(element_size, Type_DF, Any);
@@ -1567,7 +1568,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
15671568

15681569
// constants
15691570

1570-
// r0 = 0.0:df, r1 = 1.0:df, r2(r8) = 0.5:df
1571+
// r0 = 0.0:df, r1 = 1.0:df, r2(r8) = 0.5:df, r3 = 1.5:df
15711572
// NOTE: 'NoMask' is required as constants are required for splitting
15721573
// parts. Once they are in diverged branches, it won't be properly
15731574
// initialized without 'NoMask'.
@@ -1576,6 +1577,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
15761577
G4_SrcRegRegion csrc0(Mod_src_undef, Direct, t0->getRegVar(), 0, 0, srcRegionDesc, Type_DF);
15771578
G4_SrcRegRegion csrc1(Mod_src_undef, Direct, t1->getRegVar(), 0, 0, srcRegionDesc, Type_DF);
15781579
G4_SrcRegRegion csrc2(Mod_src_undef, Direct, t2->getRegVar(), 0, 0, srcRegionDesc, Type_DF);
1580+
G4_SrcRegRegion csrc3(Mod_src_undef, Direct, t3->getRegVar(), 0, 0, srcRegionDesc, Type_DF);
15791581

15801582
// each madm only handles 4 channel double data
15811583
VISA_EMask_Ctrl currEMask = emask;
@@ -1698,17 +1700,10 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
16981700
neg_src1->setAccRegSel(src1->getAccRegSel());
16991701
inst = createInst(NULL, G4_madm, NULL, false, exsize, dst0, src0, neg_src1, src2, madmInstOpt, line_no);
17001702

1701-
// madm (4) r8.acc6 r1.noacc r2(r8).noacc r1.noacc {Align16, N1/N2}
1702-
dst0 = createDstRegRegion(tdst8); dst0->setAccRegSel(ACC6);
1703-
src0 = createSrcRegRegion(csrc1); src0->setAccRegSel(NOACC);
1704-
src1 = createSrcRegRegion(csrc2); src1->setAccRegSel(NOACC);
1705-
src2 = createSrcRegRegion(csrc1); src2->setAccRegSel(NOACC);
1706-
inst = createInst(NULL, G4_madm, NULL, false, exsize, dst0, src0, src1, src2, madmInstOpt, line_no);
1707-
1708-
// madm (4) r8.acc7 r1.noacc r8.acc6 r10.acc5 {Align16, N1/N2}
1703+
// madm (4) r8.acc7 r1.noacc r3.noacc r10.acc5 {Align16, N1/N2}
17091704
dst0 = createDstRegRegion(tdst8); dst0->setAccRegSel(ACC7);
17101705
src0 = createSrcRegRegion(csrc1); src0->setAccRegSel(NOACC);
1711-
src1 = createSrcRegRegion(tsrc8); src1->setAccRegSel(ACC6);
1706+
src1 = createSrcRegRegion(csrc3); src1->setAccRegSel(NOACC);
17121707
src2 = createSrcRegRegion(tsrc10); src2->setAccRegSel(ACC5);
17131708
inst = createInst(NULL, G4_madm, NULL, false, exsize, dst0, src0, src1, src2, madmInstOpt, line_no);
17141709

0 commit comments

Comments
 (0)