Skip to content

Commit 212cba0

Browse files
[X86] Correct the cdisp8 encoding for VSCATTER/VGATHER prefetch (#122051)
during differential fuzzing, I found 8 more instructions with disp8 offset multiplier differences to binutils. somewhat sure there is a bug in the X86 LLVM disp8 offset multipliers for this subset of vector scatter and gather prefetch instructions. please check and refer to the previous pull request: #120340 these vector scatter and gather prefetch instructions also have an unusual k mask operand position but I have not addressed this with this patch as I am unsure how to change the Intel format in the tablegen file. ``` hex: 62 f2 fd 49 c6 4c 51 01 llvm: vgatherpf0dpd {k1}, zmmword ptr [rcx + 2*ymm2 + 4] ours: vgatherpf0dpd qword ptr * 8 [rcx + 2*ymm2 + 8] {k1} gnu: vgatherpf0dpd QWORD PTR [rcx+ymm2*2+0x8]{k1} hex: 62 f2 7d 49 c7 4c 51 01 llvm: vgatherpf0qps {k1}, ymmword ptr [rcx + 2*zmm2 + 8] ours: vgatherpf0qps dword ptr * 8 [rcx + 2*zmm2 + 4] {k1} gnu: vgatherpf0qps DWORD PTR [rcx+zmm2*2+0x4]{k1} hex: 62 f2 fd 49 c6 54 51 01 llvm: vgatherpf1dpd {k1}, zmmword ptr [rcx + 2*ymm2 + 4] ours: vgatherpf1dpd qword ptr * 8 [rcx + 2*ymm2 + 8] {k1} gnu: vgatherpf1dpd QWORD PTR [rcx+ymm2*2+0x8]{k1} hex: 62 f2 7d 49 c7 54 51 01 llvm: vgatherpf1qps {k1}, ymmword ptr [rcx + 2*zmm2 + 8] ours: vgatherpf1qps dword ptr * 8 [rcx + 2*zmm2 + 4] {k1} gnu: vgatherpf1qps DWORD PTR [rcx+zmm2*2+0x4]{k1} hex: 62 f2 fd 49 c6 6c 51 01 llvm: vscatterpf0dpd {k1}, zmmword ptr [rcx + 2*ymm2 + 4] ours: vscatterpf0dpd qword ptr * 8 [rcx + 2*ymm2 + 8] {k1} gnu: vscatterpf0dpd QWORD PTR [rcx+ymm2*2+0x8]{k1} hex: 62 f2 7d 49 c7 6c 51 01 llvm: vscatterpf0qps {k1}, ymmword ptr [rcx + 2*zmm2 + 8] ours: vscatterpf0qps dword ptr * 8 [rcx + 2*zmm2 + 4] {k1} gnu: vscatterpf0qps DWORD PTR [rcx+zmm2*2+0x4]{k1} hex: 62 f2 fd 49 c6 74 51 01 llvm: vscatterpf1dpd {k1}, zmmword ptr [rcx + 2*ymm2 + 4] ours: vscatterpf1dpd qword ptr * 8 [rcx + 2*ymm2 + 8] {k1} gnu: vscatterpf1dpd QWORD PTR [rcx+ymm2*2+0x8]{k1} hex: 62 f2 7d 49 c7 74 51 01 llvm: vscatterpf1qps {k1}, ymmword ptr [rcx + 2*zmm2 + 8] ours: vscatterpf1qps dword ptr * 8 [rcx + 2*zmm2 + 4] {k1} gnu: vscatterpf1qps DWORD PTR [rcx+zmm2*2+0x4]{k1} ```
1 parent 26d513d commit 212cba0

File tree

2 files changed

+72
-8
lines changed

2 files changed

+72
-8
lines changed

llvm/lib/Target/X86/X86InstrAVX512.td

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10388,10 +10388,10 @@ defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
1038810388
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
1038910389

1039010390
defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
10391-
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10391+
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
1039210392

1039310393
defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
10394-
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10394+
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
1039510395

1039610396
defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
1039710397
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10400,10 +10400,10 @@ defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
1040010400
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
1040110401

1040210402
defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
10403-
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10403+
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
1040410404

1040510405
defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
10406-
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10406+
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
1040710407

1040810408
defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
1040910409
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10412,10 +10412,10 @@ defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps
1041210412
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
1041310413

1041410414
defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps",
10415-
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10415+
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
1041610416

1041710417
defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
10418-
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10418+
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
1041910419

1042010420
defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
1042110421
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
@@ -10424,10 +10424,10 @@ defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps
1042410424
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
1042510425

1042610426
defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps",
10427-
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
10427+
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
1042810428

1042910429
defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
10430-
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
10430+
VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
1043110431

1043210432
defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
1043310433
VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;

llvm/test/MC/X86/avx512pf-64-att.s

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,3 +63,67 @@ vscatterpf0qpd (%r14,%zmm14){%k7}
6363
// CHECK: vscatterpf1qpd (%r15,%zmm13) {%k1}
6464
// CHECK: encoding: [0x62,0x92,0xfd,0x49,0xc7,0x34,0x2f]
6565
vscatterpf1qpd (%r15,%zmm13){%k1}
66+
67+
// CHECK: vgatherpf0dpd 8(%rcx,%ymm2,2) {%k1}
68+
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x4c,0x51,0x01]
69+
vgatherpf0dpd 8(%rcx,%ymm2,2){%k1}
70+
71+
// CHECK: vgatherpf0dps 4(%rcx,%zmm2,2) {%k1}
72+
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x4c,0x51,0x01]
73+
vgatherpf0dps 4(%rcx,%zmm2,2){%k1}
74+
75+
// CHECK: vgatherpf0qpd 8(%rcx,%zmm2,2) {%k1}
76+
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x4c,0x51,0x01]
77+
vgatherpf0qpd 8(%rcx,%zmm2,2){%k1}
78+
79+
// CHECK: vgatherpf0qps 4(%rcx,%zmm2,2) {%k1}
80+
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x4c,0x51,0x01]
81+
vgatherpf0qps 4(%rcx,%zmm2,2){%k1}
82+
83+
// CHECK: vgatherpf1dpd 8(%rcx,%ymm2,2) {%k1}
84+
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x54,0x51,0x01]
85+
vgatherpf1dpd 8(%rcx,%ymm2,2){%k1}
86+
87+
// CHECK: vgatherpf1dps 4(%rcx,%zmm2,2) {%k1}
88+
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x54,0x51,0x01]
89+
vgatherpf1dps 4(%rcx,%zmm2,2){%k1}
90+
91+
// CHECK: vgatherpf1qpd 8(%rcx,%zmm2,2) {%k1}
92+
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x54,0x51,0x01]
93+
vgatherpf1qpd 8(%rcx,%zmm2,2){%k1}
94+
95+
// CHECK: vgatherpf1qps 4(%rcx,%zmm2,2) {%k1}
96+
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x54,0x51,0x01]
97+
vgatherpf1qps 4(%rcx,%zmm2,2){%k1}
98+
99+
// CHECK: vscatterpf0dpd 8(%rcx,%ymm2,2) {%k1}
100+
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x6c,0x51,0x01]
101+
vscatterpf0dpd 8(%rcx,%ymm2,2){%k1}
102+
103+
// CHECK: vscatterpf0dps 4(%rcx,%zmm2,2) {%k1}
104+
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x6c,0x51,0x01]
105+
vscatterpf0dps 4(%rcx,%zmm2,2){%k1}
106+
107+
// CHECK: vscatterpf0qpd 8(%rcx,%zmm2,2) {%k1}
108+
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x6c,0x51,0x01]
109+
vscatterpf0qpd 8(%rcx,%zmm2,2){%k1}
110+
111+
// CHECK: vscatterpf0qps 4(%rcx,%zmm2,2) {%k1}
112+
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x6c,0x51,0x01]
113+
vscatterpf0qps 4(%rcx,%zmm2,2){%k1}
114+
115+
// CHECK: vscatterpf1dpd 8(%rcx,%ymm2,2) {%k1}
116+
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc6,0x74,0x51,0x01]
117+
vscatterpf1dpd 8(%rcx,%ymm2,2){%k1}
118+
119+
// CHECK: vscatterpf1dps 4(%rcx,%zmm2,2) {%k1}
120+
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc6,0x74,0x51,0x01]
121+
vscatterpf1dps 4(%rcx,%zmm2,2){%k1}
122+
123+
// CHECK: vscatterpf1qpd 8(%rcx,%zmm2,2) {%k1}
124+
// CHECK: encoding: [0x62,0xf2,0xfd,0x49,0xc7,0x74,0x51,0x01]
125+
vscatterpf1qpd 8(%rcx,%zmm2,2){%k1}
126+
127+
// CHECK: vscatterpf1qps 4(%rcx,%zmm2,2) {%k1}
128+
// CHECK: encoding: [0x62,0xf2,0x7d,0x49,0xc7,0x74,0x51,0x01]
129+
vscatterpf1qps 4(%rcx,%zmm2,2){%k1}

0 commit comments

Comments
 (0)