Skip to content

Commit 5722358

Browse files
fangliu2020igcbot
authored andcommitted
Revert changes of fast macro for double precision sqrt
Revert changes of fast macro for double precision sqrt: 1, revert [IGC vISA] Fix macro for fast double precision sqrt commit: f3be4ec4d5faea26d6f71909eece341ec4c970ec 2, revert [IGC vISA] Fix the mme mismatch issue of fast macro for double precision c6643e9196c11377520470bc61059a0151b23394
1 parent dd97032 commit 5722358

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

visa/VisaToG4/TranslateMath.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,8 +1905,8 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
19051905
inst = createMadm(predicateFlagReg_m1, exsize, dst0, src0, src1, src2,
19061906
madmInstOpt);
19071907

1908-
// 0.5*y
1909-
// H0 = 0.5*y;
1908+
// -0.5*y
1909+
// H0 = -0.5*y;
19101910
// madm (4) r9.acc3 r0.noacc r2(r8).noacc r7.acc2 {Align16, N1/N2}
19111911
dst0 = createDstRegRegion(tdst9);
19121912
dst0->setAccRegSel(ACC3);
@@ -1916,12 +1916,18 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
19161916
src1->setAccRegSel(NOACC);
19171917
src2 = createSrcRegRegion(tsrc7);
19181918
src2->setAccRegSel(ACC2);
1919-
inst = createMadm(predicateFlagReg_m2, exsize, dst0, src0, src1, src2,
1919+
G4_SrcRegRegion neg_srcRegion(*this, Mod_Minus, src1->getRegAccess(),
1920+
src1->getBase(), src1->getRegOff(),
1921+
src1->getSubRegOff(), src1->getRegion(),
1922+
src1->getType());
1923+
neg_src1 = createSrcRegRegion(neg_srcRegion);
1924+
neg_src1->setAccRegSel(src1->getAccRegSel());
1925+
inst = createMadm(predicateFlagReg_m2, exsize, dst0, src0, neg_src1, src2,
19201926
madmInstOpt);
19211927

19221928
// relative error; use double precision FMA
1923-
// eps = DP_FMA(H0, -S0, 0.5);
1924-
// eps = 0.5 - H0*S0
1929+
// eps = DP_FMA(H0, S0, 0.5);
1930+
// eps = 0.5 + H0*S0
19251931
// madm (4) r10.acc5 r2(r8).noacc -r11.acc4 r9.acc3 {Align16, N1/N2}
19261932
dst0 = createDstRegRegion(tdst10);
19271933
dst0->setAccRegSel(ACC5);
@@ -1931,13 +1937,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
19311937
src1->setAccRegSel(ACC4);
19321938
src2 = createSrcRegRegion(tsrc9);
19331939
src2->setAccRegSel(ACC3);
1934-
G4_SrcRegRegion neg_srcRegion1(*this, Mod_Minus, src1->getRegAccess(),
1935-
src1->getBase(), src1->getRegOff(),
1936-
src1->getSubRegOff(), src1->getRegion(),
1937-
src1->getType());
1938-
G4_SrcRegRegion *neg_src1 = createSrcRegRegion(neg_srcRegion1);
1939-
neg_src1->setAccRegSel(src1->getAccRegSel());
1940-
inst = createMadm(predicateFlagReg_m3, exsize, dst0, src0, neg_src1, src2,
1940+
inst = createMadm(predicateFlagReg_m3, exsize, dst0, src0, src1, src2,
19411941
madmInstOpt);
19421942

19431943
// refine approximation to ~46 bits
@@ -1975,7 +1975,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
19751975
src0->getType());
19761976

19771977
G4_SrcRegRegion *neg_src0 = createSrcRegRegion(neg_srcRegion0);
1978-
neg_src0->setAccRegSel(src0->getAccRegSel());
1978+
neg_src0->setAccRegSel(src1->getAccRegSel());
19791979
inst = createMadm(predicateFlagReg_m5, exsize, dst0, neg_src0, src1, src2,
19801980
madmInstOpt);
19811981

0 commit comments

Comments
 (0)