@@ -2018,74 +2018,73 @@ static const __device__ uint32_t iq3xxs_grid[256] = {
2018
2018
0x3e1c1c1c , 0x3e1c3404 , 0x3e24140c , 0x3e24240c , 0x3e2c0404 , 0x3e2c0414 , 0x3e2c1424 , 0x3e341c04 ,
2019
2019
};
2020
2020
2021
- static const __device__ uint32_t iq3xs_grid [512 ] = {
2022
- 0x04040404 , 0x0404040c , 0x04040414 , 0x0404042c , 0x0404043e , 0x04040c04 , 0x04040c0c , 0x04040c14 ,
2023
- 0x04040c24 , 0x04040c34 , 0x04041404 , 0x0404140c , 0x0404142c , 0x04041c1c , 0x04042404 , 0x04042414 ,
2024
- 0x0404242c , 0x0404243e , 0x04042c0c , 0x04042c1c , 0x04043404 , 0x04043414 , 0x04043e0c , 0x04043e24 ,
2025
- 0x04043e3e , 0x040c0404 , 0x040c040c , 0x040c0414 , 0x040c0424 , 0x040c0c04 , 0x040c0c0c , 0x040c0c2c ,
2026
- 0x040c1404 , 0x040c141c , 0x040c143e , 0x040c1c0c , 0x040c1c2c , 0x040c2424 , 0x040c340c , 0x040c342c ,
2027
- 0x040c3e14 , 0x04140404 , 0x0414040c , 0x0414042c , 0x0414043e , 0x04140c04 , 0x04140c1c , 0x04140c34 ,
2028
- 0x0414140c , 0x0414142c , 0x04141c04 , 0x04141c24 , 0x04142414 , 0x0414242c , 0x0414243e , 0x04142c0c ,
2029
- 0x04142c1c , 0x04143e04 , 0x04143e1c , 0x041c041c , 0x041c0c0c , 0x041c0c2c , 0x041c1404 , 0x041c1414 ,
2030
- 0x041c1c0c , 0x041c1c1c , 0x041c1c34 , 0x041c2424 , 0x041c2c04 , 0x041c2c14 , 0x041c343e , 0x041c3e0c ,
2031
- 0x041c3e2c , 0x04240404 , 0x04240c1c , 0x04240c3e , 0x0424140c , 0x04241424 , 0x04241c14 , 0x04242404 ,
2032
- 0x0424241c , 0x04242c0c , 0x04243e04 , 0x042c0414 , 0x042c0424 , 0x042c1404 , 0x042c1414 , 0x042c1434 ,
2033
- 0x042c1c1c , 0x042c240c , 0x042c242c , 0x042c243e , 0x042c3434 , 0x042c3e1c , 0x04340434 , 0x04340c0c ,
2034
- 0x04340c1c , 0x04341c0c , 0x04342c14 , 0x04343e0c , 0x043e0404 , 0x043e0414 , 0x043e0424 , 0x043e1404 ,
2035
- 0x043e1414 , 0x043e1434 , 0x043e1c1c , 0x043e2c04 , 0x043e2c24 , 0x0c040404 , 0x0c04040c , 0x0c040414 ,
2036
- 0x0c040424 , 0x0c040c04 , 0x0c040c0c , 0x0c040c1c , 0x0c040c2c , 0x0c040c3e , 0x0c041404 , 0x0c041414 ,
2037
- 0x0c041c0c , 0x0c041c24 , 0x0c041c34 , 0x0c042c24 , 0x0c042c34 , 0x0c04340c , 0x0c043e14 , 0x0c0c0404 ,
2038
- 0x0c0c040c , 0x0c0c041c , 0x0c0c0434 , 0x0c0c0c04 , 0x0c0c0c24 , 0x0c0c140c , 0x0c0c1c04 , 0x0c0c1c1c ,
2039
- 0x0c0c240c , 0x0c0c2c04 , 0x0c0c2c14 , 0x0c0c3e04 , 0x0c0c3e34 , 0x0c140404 , 0x0c140c14 , 0x0c140c2c ,
2040
- 0x0c140c3e , 0x0c141404 , 0x0c141424 , 0x0c141c14 , 0x0c142404 , 0x0c14241c , 0x0c142c2c , 0x0c143404 ,
2041
- 0x0c143e14 , 0x0c1c040c , 0x0c1c0424 , 0x0c1c043e , 0x0c1c0c04 , 0x0c1c0c1c , 0x0c1c140c , 0x0c1c143e ,
2042
- 0x0c1c1c04 , 0x0c1c1c24 , 0x0c1c240c , 0x0c1c3414 , 0x0c1c3e04 , 0x0c24041c , 0x0c24042c , 0x0c240c14 ,
2043
- 0x0c240c24 , 0x0c241c0c , 0x0c241c1c , 0x0c242414 , 0x0c242434 , 0x0c242c04 , 0x0c242c24 , 0x0c2c040c ,
2044
- 0x0c2c0c04 , 0x0c2c0c1c , 0x0c2c140c , 0x0c2c1c04 , 0x0c2c1c14 , 0x0c2c2c0c , 0x0c341404 , 0x0c341424 ,
2045
- 0x0c34143e , 0x0c342424 , 0x0c342434 , 0x0c3e040c , 0x0c3e041c , 0x0c3e0c04 , 0x0c3e0c14 , 0x0c3e140c ,
2046
- 0x0c3e1c2c , 0x0c3e240c , 0x0c3e3414 , 0x0c3e3e04 , 0x14040404 , 0x1404040c , 0x1404041c , 0x1404042c ,
2047
- 0x1404043e , 0x14040c04 , 0x14040c14 , 0x14040c24 , 0x14040c34 , 0x1404140c , 0x1404141c , 0x1404143e ,
2048
- 0x14041c04 , 0x14041c14 , 0x1404240c , 0x1404241c , 0x1404242c , 0x14042c04 , 0x14042c14 , 0x1404343e ,
2049
- 0x14043e04 , 0x14043e1c , 0x14043e2c , 0x140c0404 , 0x140c0414 , 0x140c0c04 , 0x140c0c1c , 0x140c0c3e ,
2050
- 0x140c1414 , 0x140c142c , 0x140c1c0c , 0x140c1c24 , 0x140c2414 , 0x140c2c0c , 0x1414040c , 0x14140424 ,
2051
- 0x1414043e , 0x1414140c , 0x1414141c , 0x14141c04 , 0x14141c3e , 0x1414240c , 0x14142c1c , 0x14142c3e ,
2052
- 0x14143e0c , 0x14143e24 , 0x141c0404 , 0x141c0414 , 0x141c042c , 0x141c0c0c , 0x141c1414 , 0x141c1424 ,
2053
- 0x141c1c0c , 0x141c1c1c , 0x141c2414 , 0x141c2c04 , 0x141c3434 , 0x1424040c , 0x1424043e , 0x14241404 ,
2054
- 0x1424141c , 0x14241c14 , 0x14241c2c , 0x1424240c , 0x14243e14 , 0x14243e2c , 0x142c0424 , 0x142c0c0c ,
2055
- 0x142c1414 , 0x142c1c3e , 0x142c2404 , 0x142c2c1c , 0x142c3e04 , 0x14340404 , 0x14340414 , 0x1434043e ,
2056
- 0x1434140c , 0x14342c2c , 0x1434340c , 0x143e042c , 0x143e0c0c , 0x143e1434 , 0x143e1c04 , 0x143e241c ,
2057
- 0x143e2c04 , 0x1c040414 , 0x1c040c0c , 0x1c040c1c , 0x1c040c2c , 0x1c040c3e , 0x1c041414 , 0x1c041c0c ,
2058
- 0x1c041c1c , 0x1c041c2c , 0x1c042414 , 0x1c042424 , 0x1c04243e , 0x1c042c0c , 0x1c04341c , 0x1c043e0c ,
2059
- 0x1c0c040c , 0x1c0c041c , 0x1c0c042c , 0x1c0c0c24 , 0x1c0c140c , 0x1c0c141c , 0x1c0c2404 , 0x1c0c3404 ,
2060
- 0x1c0c3e14 , 0x1c0c3e34 , 0x1c140404 , 0x1c140c14 , 0x1c141404 , 0x1c141c14 , 0x1c141c24 , 0x1c142c04 ,
2061
- 0x1c1c040c , 0x1c1c0c04 , 0x1c1c0c24 , 0x1c1c140c , 0x1c1c141c , 0x1c1c143e , 0x1c1c1c04 , 0x1c1c240c ,
2062
- 0x1c1c241c , 0x1c1c243e , 0x1c1c2c2c , 0x1c1c3e1c , 0x1c24041c , 0x1c240c0c , 0x1c240c34 , 0x1c241414 ,
2063
- 0x1c241c0c , 0x1c242c14 , 0x1c243404 , 0x1c243424 , 0x1c2c040c , 0x1c2c0c04 , 0x1c2c0c14 , 0x1c2c142c ,
2064
- 0x1c2c1c14 , 0x1c2c2424 , 0x1c2c2c34 , 0x1c2c3e1c , 0x1c340c34 , 0x1c34240c , 0x1c3e040c , 0x1c3e041c ,
2065
- 0x1c3e1404 , 0x1c3e1414 , 0x1c3e1c2c , 0x24040404 , 0x24040424 , 0x24040c14 , 0x24041404 , 0x24041424 ,
2066
- 0x2404143e , 0x24041c14 , 0x2404240c , 0x24042c04 , 0x24043e04 , 0x240c0414 , 0x240c043e , 0x240c0c0c ,
2067
- 0x240c0c1c , 0x240c1414 , 0x240c1c04 , 0x240c1c2c , 0x240c241c , 0x240c2c0c , 0x240c2c2c , 0x2414040c ,
2068
- 0x2414041c , 0x24140c04 , 0x24140c2c , 0x2414140c , 0x24141c1c , 0x24142404 , 0x24142c3e , 0x24143414 ,
2069
- 0x24143e04 , 0x241c0424 , 0x241c0c0c , 0x241c0c1c , 0x241c1404 , 0x241c1414 , 0x241c1c0c , 0x241c1c2c ,
2070
- 0x24240404 , 0x24240414 , 0x24241424 , 0x24241c3e , 0x24242404 , 0x24243e0c , 0x242c042c , 0x242c043e ,
2071
- 0x242c140c , 0x242c3414 , 0x24340c1c , 0x24341c24 , 0x24343404 , 0x243e0c04 , 0x243e0c2c , 0x243e1c04 ,
2072
- 0x243e241c , 0x243e2c0c , 0x2c040414 , 0x2c040c04 , 0x2c040c24 , 0x2c041414 , 0x2c042404 , 0x2c042424 ,
2073
- 0x2c04243e , 0x2c042c14 , 0x2c043434 , 0x2c043e24 , 0x2c0c040c , 0x2c0c041c , 0x2c0c042c , 0x2c0c0c14 ,
2074
- 0x2c0c140c , 0x2c0c1c14 , 0x2c0c3e14 , 0x2c140404 , 0x2c140c0c , 0x2c14141c , 0x2c141c04 , 0x2c141c34 ,
2075
- 0x2c142c1c , 0x2c1c0414 , 0x2c1c043e , 0x2c1c0c04 , 0x2c1c143e , 0x2c1c2424 , 0x2c1c2c0c , 0x2c1c342c ,
2076
- 0x2c1c3e1c , 0x2c24040c , 0x2c240424 , 0x2c241404 , 0x2c241c14 , 0x2c242434 , 0x2c2c0c14 , 0x2c2c1434 ,
2077
- 0x2c2c2c0c , 0x2c2c2c1c , 0x2c342414 , 0x2c3e0414 , 0x2c3e0424 , 0x2c3e1414 , 0x34040c0c , 0x34040c1c ,
2078
- 0x34040c2c , 0x34041c0c , 0x34041c1c , 0x34043404 , 0x340c0404 , 0x340c1404 , 0x340c143e , 0x340c3424 ,
2079
- 0x34140c14 , 0x34141c24 , 0x34142414 , 0x34142c2c , 0x34143414 , 0x34143e04 , 0x341c0404 , 0x341c0c24 ,
2080
- 0x341c140c , 0x341c2404 , 0x3424142c , 0x3424241c , 0x34243414 , 0x342c0404 , 0x342c041c , 0x342c1c24 ,
2081
- 0x342c3404 , 0x3434042c , 0x34342404 , 0x343e0c0c , 0x343e0c1c , 0x3e040404 , 0x3e040424 , 0x3e04043e ,
2082
- 0x3e041404 , 0x3e041414 , 0x3e041c34 , 0x3e042404 , 0x3e042c24 , 0x3e043414 , 0x3e0c0414 , 0x3e0c0c0c ,
2083
- 0x3e0c1424 , 0x3e0c241c , 0x3e0c242c , 0x3e14040c , 0x3e140424 , 0x3e140c04 , 0x3e140c34 , 0x3e14140c ,
2084
- 0x3e141c04 , 0x3e142c0c , 0x3e1c0414 , 0x3e1c1c14 , 0x3e1c1c2c , 0x3e1c2c1c , 0x3e24040c , 0x3e24042c ,
2085
- 0x3e240c1c , 0x3e241404 , 0x3e242c04 , 0x3e2c1414 , 0x3e2c2414 , 0x3e340414 , 0x3e341c0c , 0x3e3e0404 ,
2021
+ static const __device__ uint32_t iq3s_grid [512 ] = {
2022
+ 0x01010101 , 0x01010103 , 0x01010105 , 0x0101010b , 0x0101010f , 0x01010301 , 0x01010303 , 0x01010305 ,
2023
+ 0x01010309 , 0x0101030d , 0x01010501 , 0x01010503 , 0x0101050b , 0x01010707 , 0x01010901 , 0x01010905 ,
2024
+ 0x0101090b , 0x0101090f , 0x01010b03 , 0x01010b07 , 0x01010d01 , 0x01010d05 , 0x01010f03 , 0x01010f09 ,
2025
+ 0x01010f0f , 0x01030101 , 0x01030103 , 0x01030105 , 0x01030109 , 0x01030301 , 0x01030303 , 0x0103030b ,
2026
+ 0x01030501 , 0x01030507 , 0x0103050f , 0x01030703 , 0x0103070b , 0x01030909 , 0x01030d03 , 0x01030d0b ,
2027
+ 0x01030f05 , 0x01050101 , 0x01050103 , 0x0105010b , 0x0105010f , 0x01050301 , 0x01050307 , 0x0105030d ,
2028
+ 0x01050503 , 0x0105050b , 0x01050701 , 0x01050709 , 0x01050905 , 0x0105090b , 0x0105090f , 0x01050b03 ,
2029
+ 0x01050b07 , 0x01050f01 , 0x01050f07 , 0x01070107 , 0x01070303 , 0x0107030b , 0x01070501 , 0x01070505 ,
2030
+ 0x01070703 , 0x01070707 , 0x0107070d , 0x01070909 , 0x01070b01 , 0x01070b05 , 0x01070d0f , 0x01070f03 ,
2031
+ 0x01070f0b , 0x01090101 , 0x01090307 , 0x0109030f , 0x01090503 , 0x01090509 , 0x01090705 , 0x01090901 ,
2032
+ 0x01090907 , 0x01090b03 , 0x01090f01 , 0x010b0105 , 0x010b0109 , 0x010b0501 , 0x010b0505 , 0x010b050d ,
2033
+ 0x010b0707 , 0x010b0903 , 0x010b090b , 0x010b090f , 0x010b0d0d , 0x010b0f07 , 0x010d010d , 0x010d0303 ,
2034
+ 0x010d0307 , 0x010d0703 , 0x010d0b05 , 0x010d0f03 , 0x010f0101 , 0x010f0105 , 0x010f0109 , 0x010f0501 ,
2035
+ 0x010f0505 , 0x010f050d , 0x010f0707 , 0x010f0b01 , 0x010f0b09 , 0x03010101 , 0x03010103 , 0x03010105 ,
2036
+ 0x03010109 , 0x03010301 , 0x03010303 , 0x03010307 , 0x0301030b , 0x0301030f , 0x03010501 , 0x03010505 ,
2037
+ 0x03010703 , 0x03010709 , 0x0301070d , 0x03010b09 , 0x03010b0d , 0x03010d03 , 0x03010f05 , 0x03030101 ,
2038
+ 0x03030103 , 0x03030107 , 0x0303010d , 0x03030301 , 0x03030309 , 0x03030503 , 0x03030701 , 0x03030707 ,
2039
+ 0x03030903 , 0x03030b01 , 0x03030b05 , 0x03030f01 , 0x03030f0d , 0x03050101 , 0x03050305 , 0x0305030b ,
2040
+ 0x0305030f , 0x03050501 , 0x03050509 , 0x03050705 , 0x03050901 , 0x03050907 , 0x03050b0b , 0x03050d01 ,
2041
+ 0x03050f05 , 0x03070103 , 0x03070109 , 0x0307010f , 0x03070301 , 0x03070307 , 0x03070503 , 0x0307050f ,
2042
+ 0x03070701 , 0x03070709 , 0x03070903 , 0x03070d05 , 0x03070f01 , 0x03090107 , 0x0309010b , 0x03090305 ,
2043
+ 0x03090309 , 0x03090703 , 0x03090707 , 0x03090905 , 0x0309090d , 0x03090b01 , 0x03090b09 , 0x030b0103 ,
2044
+ 0x030b0301 , 0x030b0307 , 0x030b0503 , 0x030b0701 , 0x030b0705 , 0x030b0b03 , 0x030d0501 , 0x030d0509 ,
2045
+ 0x030d050f , 0x030d0909 , 0x030d090d , 0x030f0103 , 0x030f0107 , 0x030f0301 , 0x030f0305 , 0x030f0503 ,
2046
+ 0x030f070b , 0x030f0903 , 0x030f0d05 , 0x030f0f01 , 0x05010101 , 0x05010103 , 0x05010107 , 0x0501010b ,
2047
+ 0x0501010f , 0x05010301 , 0x05010305 , 0x05010309 , 0x0501030d , 0x05010503 , 0x05010507 , 0x0501050f ,
2048
+ 0x05010701 , 0x05010705 , 0x05010903 , 0x05010907 , 0x0501090b , 0x05010b01 , 0x05010b05 , 0x05010d0f ,
2049
+ 0x05010f01 , 0x05010f07 , 0x05010f0b , 0x05030101 , 0x05030105 , 0x05030301 , 0x05030307 , 0x0503030f ,
2050
+ 0x05030505 , 0x0503050b , 0x05030703 , 0x05030709 , 0x05030905 , 0x05030b03 , 0x05050103 , 0x05050109 ,
2051
+ 0x0505010f , 0x05050503 , 0x05050507 , 0x05050701 , 0x0505070f , 0x05050903 , 0x05050b07 , 0x05050b0f ,
2052
+ 0x05050f03 , 0x05050f09 , 0x05070101 , 0x05070105 , 0x0507010b , 0x05070303 , 0x05070505 , 0x05070509 ,
2053
+ 0x05070703 , 0x05070707 , 0x05070905 , 0x05070b01 , 0x05070d0d , 0x05090103 , 0x0509010f , 0x05090501 ,
2054
+ 0x05090507 , 0x05090705 , 0x0509070b , 0x05090903 , 0x05090f05 , 0x05090f0b , 0x050b0109 , 0x050b0303 ,
2055
+ 0x050b0505 , 0x050b070f , 0x050b0901 , 0x050b0b07 , 0x050b0f01 , 0x050d0101 , 0x050d0105 , 0x050d010f ,
2056
+ 0x050d0503 , 0x050d0b0b , 0x050d0d03 , 0x050f010b , 0x050f0303 , 0x050f050d , 0x050f0701 , 0x050f0907 ,
2057
+ 0x050f0b01 , 0x07010105 , 0x07010303 , 0x07010307 , 0x0701030b , 0x0701030f , 0x07010505 , 0x07010703 ,
2058
+ 0x07010707 , 0x0701070b , 0x07010905 , 0x07010909 , 0x0701090f , 0x07010b03 , 0x07010d07 , 0x07010f03 ,
2059
+ 0x07030103 , 0x07030107 , 0x0703010b , 0x07030309 , 0x07030503 , 0x07030507 , 0x07030901 , 0x07030d01 ,
2060
+ 0x07030f05 , 0x07030f0d , 0x07050101 , 0x07050305 , 0x07050501 , 0x07050705 , 0x07050709 , 0x07050b01 ,
2061
+ 0x07070103 , 0x07070301 , 0x07070309 , 0x07070503 , 0x07070507 , 0x0707050f , 0x07070701 , 0x07070903 ,
2062
+ 0x07070907 , 0x0707090f , 0x07070b0b , 0x07070f07 , 0x07090107 , 0x07090303 , 0x0709030d , 0x07090505 ,
2063
+ 0x07090703 , 0x07090b05 , 0x07090d01 , 0x07090d09 , 0x070b0103 , 0x070b0301 , 0x070b0305 , 0x070b050b ,
2064
+ 0x070b0705 , 0x070b0909 , 0x070b0b0d , 0x070b0f07 , 0x070d030d , 0x070d0903 , 0x070f0103 , 0x070f0107 ,
2065
+ 0x070f0501 , 0x070f0505 , 0x070f070b , 0x09010101 , 0x09010109 , 0x09010305 , 0x09010501 , 0x09010509 ,
2066
+ 0x0901050f , 0x09010705 , 0x09010903 , 0x09010b01 , 0x09010f01 , 0x09030105 , 0x0903010f , 0x09030303 ,
2067
+ 0x09030307 , 0x09030505 , 0x09030701 , 0x0903070b , 0x09030907 , 0x09030b03 , 0x09030b0b , 0x09050103 ,
2068
+ 0x09050107 , 0x09050301 , 0x0905030b , 0x09050503 , 0x09050707 , 0x09050901 , 0x09050b0f , 0x09050d05 ,
2069
+ 0x09050f01 , 0x09070109 , 0x09070303 , 0x09070307 , 0x09070501 , 0x09070505 , 0x09070703 , 0x0907070b ,
2070
+ 0x09090101 , 0x09090105 , 0x09090509 , 0x0909070f , 0x09090901 , 0x09090f03 , 0x090b010b , 0x090b010f ,
2071
+ 0x090b0503 , 0x090b0d05 , 0x090d0307 , 0x090d0709 , 0x090d0d01 , 0x090f0301 , 0x090f030b , 0x090f0701 ,
2072
+ 0x090f0907 , 0x090f0b03 , 0x0b010105 , 0x0b010301 , 0x0b010309 , 0x0b010505 , 0x0b010901 , 0x0b010909 ,
2073
+ 0x0b01090f , 0x0b010b05 , 0x0b010d0d , 0x0b010f09 , 0x0b030103 , 0x0b030107 , 0x0b03010b , 0x0b030305 ,
2074
+ 0x0b030503 , 0x0b030705 , 0x0b030f05 , 0x0b050101 , 0x0b050303 , 0x0b050507 , 0x0b050701 , 0x0b05070d ,
2075
+ 0x0b050b07 , 0x0b070105 , 0x0b07010f , 0x0b070301 , 0x0b07050f , 0x0b070909 , 0x0b070b03 , 0x0b070d0b ,
2076
+ 0x0b070f07 , 0x0b090103 , 0x0b090109 , 0x0b090501 , 0x0b090705 , 0x0b09090d , 0x0b0b0305 , 0x0b0b050d ,
2077
+ 0x0b0b0b03 , 0x0b0b0b07 , 0x0b0d0905 , 0x0b0f0105 , 0x0b0f0109 , 0x0b0f0505 , 0x0d010303 , 0x0d010307 ,
2078
+ 0x0d01030b , 0x0d010703 , 0x0d010707 , 0x0d010d01 , 0x0d030101 , 0x0d030501 , 0x0d03050f , 0x0d030d09 ,
2079
+ 0x0d050305 , 0x0d050709 , 0x0d050905 , 0x0d050b0b , 0x0d050d05 , 0x0d050f01 , 0x0d070101 , 0x0d070309 ,
2080
+ 0x0d070503 , 0x0d070901 , 0x0d09050b , 0x0d090907 , 0x0d090d05 , 0x0d0b0101 , 0x0d0b0107 , 0x0d0b0709 ,
2081
+ 0x0d0b0d01 , 0x0d0d010b , 0x0d0d0901 , 0x0d0f0303 , 0x0d0f0307 , 0x0f010101 , 0x0f010109 , 0x0f01010f ,
2082
+ 0x0f010501 , 0x0f010505 , 0x0f01070d , 0x0f010901 , 0x0f010b09 , 0x0f010d05 , 0x0f030105 , 0x0f030303 ,
2083
+ 0x0f030509 , 0x0f030907 , 0x0f03090b , 0x0f050103 , 0x0f050109 , 0x0f050301 , 0x0f05030d , 0x0f050503 ,
2084
+ 0x0f050701 , 0x0f050b03 , 0x0f070105 , 0x0f070705 , 0x0f07070b , 0x0f070b07 , 0x0f090103 , 0x0f09010b ,
2085
+ 0x0f090307 , 0x0f090501 , 0x0f090b01 , 0x0f0b0505 , 0x0f0b0905 , 0x0f0d0105 , 0x0f0d0703 , 0x0f0f0101 ,
2086
2086
};
2087
2087
2088
-
2089
2088
static const __device__ uint64_t iq1s_grid[512 ] = {
2090
2089
0xffffffffffff0101 , 0xffffffffff01ff00 , 0xffffffffff010100 , 0xffffffff00000000 ,
2091
2090
0xffffffff01ff00ff , 0xffffffff01ff0001 , 0xffffffff0101ffff , 0xffffffff0101ff01 ,
@@ -2392,9 +2391,9 @@ static __global__ void dequantize_block_iq3_s(const void * __restrict__ vx, dst_
2392
2391
const int ib = tid%8 ; // 0...7
2393
2392
dst_t * y = yy + i*QK_K + 32 *ib + 8 *il;
2394
2393
const uint8_t * qs = x[i].qs + 8 *ib;
2395
- const uint8_t * grid1 = (const uint8_t *)(iq3xs_grid + (qs[2 *il+0 ] | ((x[i].qh [ib] << (8 -2 *il)) & 256 )));
2396
- const uint8_t * grid2 = (const uint8_t *)(iq3xs_grid + (qs[2 *il+1 ] | ((x[i].qh [ib] << (7 -2 *il)) & 256 )));
2397
- const float d = (float )x[i].d * (0 . 5f + ((x[i].scales [ib/2 ] >> 4 *(ib%2 )) & 0xf )) * 0 . 5f ;
2394
+ const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + (qs[2 *il+0 ] | ((x[i].qh [ib] << (8 -2 *il)) & 256 )));
2395
+ const uint8_t * grid2 = (const uint8_t *)(iq3s_grid + (qs[2 *il+1 ] | ((x[i].qh [ib] << (7 -2 *il)) & 256 )));
2396
+ const float d = (float )x[i].d * (1 + 2 * ((x[i].scales [ib/2 ] >> 4 *(ib%2 )) & 0xf ));
2398
2397
const uint8_t signs = x[i].signs [4 *ib + il];
2399
2398
for (int j = 0 ; j < 4 ; ++j) {
2400
2399
y[j+0 ] = d * grid1[j] * (signs & kmask_iq2xs[j+0 ] ? -1 .f : 1 .f );
@@ -5211,8 +5210,8 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
5211
5210
const int8_t * q8 = bq8_1[ib32].qs ;
5212
5211
int sumi = 0 ;
5213
5212
for (int l = 0 ; l < 4 ; ++l) {
5214
- const uint32_t * grid1 = iq3xs_grid + (qs[2 *l+0 ] | ((bq2->qh [ib32] << (8 - 2 *l)) & 256 ));
5215
- const uint32_t * grid2 = iq3xs_grid + (qs[2 *l+1 ] | ((bq2->qh [ib32] << (7 - 2 *l)) & 256 ));
5213
+ const uint32_t * grid1 = iq3s_grid + (qs[2 *l+0 ] | ((bq2->qh [ib32] << (8 - 2 *l)) & 256 ));
5214
+ const uint32_t * grid2 = iq3s_grid + (qs[2 *l+1 ] | ((bq2->qh [ib32] << (7 - 2 *l)) & 256 ));
5216
5215
uint32_t signs0 = __vcmpeq4 (((bq2->signs [4 *ib32+l] & 0xf ) * 0x01010101 ) & 0x08040201 , 0x08040201 );
5217
5216
uint32_t signs1 = __vcmpeq4 (((bq2->signs [4 *ib32+l] >> 4 ) * 0x01010101 ) & 0x08040201 , 0x08040201 );
5218
5217
const int grid_l = __vsub4 (grid1[0 ] ^ signs0, signs0);
@@ -5221,7 +5220,7 @@ static __device__ __forceinline__ float vec_dot_iq3_s_q8_1(
5221
5220
sumi = __dp4a (grid_h, *((int *)q8+1 ), sumi);
5222
5221
q8 += 8 ;
5223
5222
}
5224
- const float d = (float )bq2->d * (0 . 5f + ((bq2->scales [ib32/2 ] >> 4 *(ib32%2 )) & 0xf )) * __low2float (bq8_1[ib32].ds ) * 0 . 5f ;
5223
+ const float d = (float )bq2->d * (1 + 2 * ((bq2->scales [ib32/2 ] >> 4 *(ib32%2 )) & 0xf )) * __low2float (bq8_1[ib32].ds );
5225
5224
return d * sumi;
5226
5225
#else
5227
5226
assert (false );
0 commit comments