@@ -1905,8 +1905,8 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
1905
1905
inst = createMadm (predicateFlagReg_m1, exsize, dst0, src0, src1, src2,
1906
1906
madmInstOpt);
1907
1907
1908
- // 0.5*y
1909
- // H0 = 0.5*y;
1908
+ // - 0.5*y
1909
+ // H0 = - 0.5*y;
1910
1910
// madm (4) r9.acc3 r0.noacc r2(r8).noacc r7.acc2 {Align16, N1/N2}
1911
1911
dst0 = createDstRegRegion (tdst9);
1912
1912
dst0->setAccRegSel (ACC3);
@@ -1916,12 +1916,18 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
1916
1916
src1->setAccRegSel (NOACC);
1917
1917
src2 = createSrcRegRegion (tsrc7);
1918
1918
src2->setAccRegSel (ACC2);
1919
- inst = createMadm (predicateFlagReg_m2, exsize, dst0, src0, src1, src2,
1919
+ G4_SrcRegRegion neg_srcRegion (*this , Mod_Minus, src1->getRegAccess (),
1920
+ src1->getBase (), src1->getRegOff (),
1921
+ src1->getSubRegOff (), src1->getRegion (),
1922
+ src1->getType ());
1923
+ neg_src1 = createSrcRegRegion (neg_srcRegion);
1924
+ neg_src1->setAccRegSel (src1->getAccRegSel ());
1925
+ inst = createMadm (predicateFlagReg_m2, exsize, dst0, src0, neg_src1, src2,
1920
1926
madmInstOpt);
1921
1927
1922
1928
// relative error; use double precision FMA
1923
- // eps = DP_FMA(H0, - S0, 0.5);
1924
- // eps = 0.5 - H0*S0
1929
+ // eps = DP_FMA(H0, S0, 0.5);
1930
+ // eps = 0.5 + H0*S0
1925
1931
// madm (4) r10.acc5 r2(r8).noacc -r11.acc4 r9.acc3 {Align16, N1/N2}
1926
1932
dst0 = createDstRegRegion (tdst10);
1927
1933
dst0->setAccRegSel (ACC5);
@@ -1931,13 +1937,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
1931
1937
src1->setAccRegSel (ACC4);
1932
1938
src2 = createSrcRegRegion (tsrc9);
1933
1939
src2->setAccRegSel (ACC3);
1934
- G4_SrcRegRegion neg_srcRegion1 (*this , Mod_Minus, src1->getRegAccess (),
1935
- src1->getBase (), src1->getRegOff (),
1936
- src1->getSubRegOff (), src1->getRegion (),
1937
- src1->getType ());
1938
- G4_SrcRegRegion *neg_src1 = createSrcRegRegion (neg_srcRegion1);
1939
- neg_src1->setAccRegSel (src1->getAccRegSel ());
1940
- inst = createMadm (predicateFlagReg_m3, exsize, dst0, src0, neg_src1, src2,
1940
+ inst = createMadm (predicateFlagReg_m3, exsize, dst0, src0, src1, src2,
1941
1941
madmInstOpt);
1942
1942
1943
1943
// refine approximation to ~46 bits
@@ -1975,7 +1975,7 @@ int IR_Builder::translateVISAArithmeticDoubleSQRTInst(
1975
1975
src0->getType ());
1976
1976
1977
1977
G4_SrcRegRegion *neg_src0 = createSrcRegRegion (neg_srcRegion0);
1978
- neg_src0->setAccRegSel (src0 ->getAccRegSel ());
1978
+ neg_src0->setAccRegSel (src1 ->getAccRegSel ());
1979
1979
inst = createMadm (predicateFlagReg_m5, exsize, dst0, neg_src0, src1, src2,
1980
1980
madmInstOpt);
1981
1981
0 commit comments