@@ -1537,6 +1537,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
1537
1537
G4_Declare *t0 = getImmDcl (createDFImm (0.0 ), element_size);
1538
1538
G4_Declare *t1 = getImmDcl (createDFImm (1.0 ), element_size);
1539
1539
G4_Declare *t2 = getImmDcl (createDFImm (0.5 ), element_size);
1540
+ G4_Declare* t3 = getImmDcl (createDFImm (1.5 ), element_size);
1540
1541
G4_Declare *t6 = createTempVarWithNoSpill (element_size, Type_DF, Any);
1541
1542
G4_Declare *t7 = createTempVarWithNoSpill (element_size, Type_DF, Any);
1542
1543
G4_Declare *t8 = createTempVarWithNoSpill (element_size, Type_DF, Any);
@@ -1567,7 +1568,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
1567
1568
1568
1569
// constants
1569
1570
1570
- // r0 = 0.0:df, r1 = 1.0:df, r2(r8) = 0.5:df
1571
+ // r0 = 0.0:df, r1 = 1.0:df, r2(r8) = 0.5:df, r3 = 1.5:df
1571
1572
// NOTE: 'NoMask' is required as constants are required for splitting
1572
1573
// parts. Once they are in diverged branches, it won't be properly
1573
1574
// initialized without 'NoMask'.
@@ -1576,6 +1577,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
1576
1577
G4_SrcRegRegion csrc0 (Mod_src_undef, Direct, t0->getRegVar (), 0 , 0 , srcRegionDesc, Type_DF);
1577
1578
G4_SrcRegRegion csrc1 (Mod_src_undef, Direct, t1->getRegVar (), 0 , 0 , srcRegionDesc, Type_DF);
1578
1579
G4_SrcRegRegion csrc2 (Mod_src_undef, Direct, t2->getRegVar (), 0 , 0 , srcRegionDesc, Type_DF);
1580
+ G4_SrcRegRegion csrc3 (Mod_src_undef, Direct, t3->getRegVar (), 0 , 0 , srcRegionDesc, Type_DF);
1579
1581
1580
1582
// each madm only handles 4 channel double data
1581
1583
VISA_EMask_Ctrl currEMask = emask;
@@ -1698,17 +1700,10 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
1698
1700
neg_src1->setAccRegSel (src1->getAccRegSel ());
1699
1701
inst = createInst (NULL , G4_madm, NULL , false , exsize, dst0, src0, neg_src1, src2, madmInstOpt, line_no);
1700
1702
1701
- // madm (4) r8.acc6 r1.noacc r2(r8).noacc r1.noacc {Align16, N1/N2}
1702
- dst0 = createDstRegRegion (tdst8); dst0->setAccRegSel (ACC6);
1703
- src0 = createSrcRegRegion (csrc1); src0->setAccRegSel (NOACC);
1704
- src1 = createSrcRegRegion (csrc2); src1->setAccRegSel (NOACC);
1705
- src2 = createSrcRegRegion (csrc1); src2->setAccRegSel (NOACC);
1706
- inst = createInst (NULL , G4_madm, NULL , false , exsize, dst0, src0, src1, src2, madmInstOpt, line_no);
1707
-
1708
- // madm (4) r8.acc7 r1.noacc r8.acc6 r10.acc5 {Align16, N1/N2}
1703
+ // madm (4) r8.acc7 r1.noacc r3.noacc r10.acc5 {Align16, N1/N2}
1709
1704
dst0 = createDstRegRegion (tdst8); dst0->setAccRegSel (ACC7);
1710
1705
src0 = createSrcRegRegion (csrc1); src0->setAccRegSel (NOACC);
1711
- src1 = createSrcRegRegion (tsrc8 ); src1->setAccRegSel (ACC6 );
1706
+ src1 = createSrcRegRegion (csrc3 ); src1->setAccRegSel (NOACC );
1712
1707
src2 = createSrcRegRegion (tsrc10); src2->setAccRegSel (ACC5);
1713
1708
inst = createInst (NULL , G4_madm, NULL , false , exsize, dst0, src0, src1, src2, madmInstOpt, line_no);
1714
1709
0 commit comments