Skip to content

Commit 2c4a53a

Browse files
committed
[X86][NFC] Simplify the definitions of double precision shift instructions
1 parent dca6f60 commit 2c4a53a

File tree

2 files changed

+97
-178
lines changed

2 files changed

+97
-178
lines changed

llvm/lib/Target/X86/X86InstrShiftRotate.td

Lines changed: 93 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -68,164 +68,6 @@ defm ROR: ShiftRotate<"ror", MRM1r, MRM1m, rotr, WriteRotateCL, WriteRotate, Wri
6868
defm RCL: ShiftRotate<"rcl", MRM2r, MRM2m, null_frag, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd, [EFLAGS]>;
6969
defm RCR: ShiftRotate<"rcr", MRM3r, MRM3m, null_frag, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd, [EFLAGS]>;
7070

71-
//===----------------------------------------------------------------------===//
72-
// Double shift instructions (generalizations of rotate)
73-
//===----------------------------------------------------------------------===//
74-
75-
let Defs = [EFLAGS], hasSideEffects = 0 in {
76-
let Constraints = "$src1 = $dst" in {
77-
78-
let Uses = [CL], SchedRW = [WriteSHDrrcl] in {
79-
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
80-
(ins GR16:$src1, GR16:$src2),
81-
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
82-
[(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2, CL))]>,
83-
TB, OpSize16;
84-
def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
85-
(ins GR16:$src1, GR16:$src2),
86-
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
87-
[(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1, CL))]>,
88-
TB, OpSize16;
89-
def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
90-
(ins GR32:$src1, GR32:$src2),
91-
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
92-
[(set GR32:$dst, (fshl GR32:$src1, GR32:$src2, CL))]>,
93-
TB, OpSize32;
94-
def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
95-
(ins GR32:$src1, GR32:$src2),
96-
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
97-
[(set GR32:$dst, (fshr GR32:$src2, GR32:$src1, CL))]>,
98-
TB, OpSize32;
99-
def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
100-
(ins GR64:$src1, GR64:$src2),
101-
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
102-
[(set GR64:$dst, (fshl GR64:$src1, GR64:$src2, CL))]>,
103-
TB;
104-
def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
105-
(ins GR64:$src1, GR64:$src2),
106-
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
107-
[(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, CL))]>,
108-
TB;
109-
} // Uses, SchedRW
110-
111-
let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other.
112-
def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
113-
(outs GR16:$dst),
114-
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
115-
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
116-
[(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2,
117-
(i8 imm:$src3)))]>,
118-
TB, OpSize16;
119-
def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
120-
(outs GR16:$dst),
121-
(ins GR16:$src1, GR16:$src2, u8imm:$src3),
122-
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
123-
[(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1,
124-
(i8 imm:$src3)))]>,
125-
TB, OpSize16;
126-
def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
127-
(outs GR32:$dst),
128-
(ins GR32:$src1, GR32:$src2, u8imm:$src3),
129-
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
130-
[(set GR32:$dst, (fshl GR32:$src1, GR32:$src2,
131-
(i8 imm:$src3)))]>,
132-
TB, OpSize32;
133-
def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
134-
(outs GR32:$dst),
135-
(ins GR32:$src1, GR32:$src2, u8imm:$src3),
136-
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
137-
[(set GR32:$dst, (fshr GR32:$src2, GR32:$src1,
138-
(i8 imm:$src3)))]>,
139-
TB, OpSize32;
140-
def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
141-
(outs GR64:$dst),
142-
(ins GR64:$src1, GR64:$src2, u8imm:$src3),
143-
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
144-
[(set GR64:$dst, (fshl GR64:$src1, GR64:$src2,
145-
(i8 imm:$src3)))]>,
146-
TB;
147-
def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
148-
(outs GR64:$dst),
149-
(ins GR64:$src1, GR64:$src2, u8imm:$src3),
150-
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
151-
[(set GR64:$dst, (fshr GR64:$src2, GR64:$src1,
152-
(i8 imm:$src3)))]>,
153-
TB;
154-
} // SchedRW
155-
} // Constraints = "$src = $dst"
156-
157-
let Uses = [CL], SchedRW = [WriteSHDmrcl] in {
158-
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
159-
"shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
160-
[(store (X86fshl (loadi16 addr:$dst), GR16:$src2, CL),
161-
addr:$dst)]>, TB, OpSize16;
162-
def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
163-
"shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
164-
[(store (X86fshr GR16:$src2, (loadi16 addr:$dst), CL),
165-
addr:$dst)]>, TB, OpSize16;
166-
167-
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
168-
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
169-
[(store (fshl (loadi32 addr:$dst), GR32:$src2, CL),
170-
addr:$dst)]>, TB, OpSize32;
171-
def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
172-
"shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
173-
[(store (fshr GR32:$src2, (loadi32 addr:$dst), CL),
174-
addr:$dst)]>, TB, OpSize32;
175-
176-
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
177-
"shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
178-
[(store (fshl (loadi64 addr:$dst), GR64:$src2, CL),
179-
addr:$dst)]>, TB;
180-
def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
181-
"shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
182-
[(store (fshr GR64:$src2, (loadi64 addr:$dst), CL),
183-
addr:$dst)]>, TB;
184-
} // Uses, SchedRW
185-
186-
let SchedRW = [WriteSHDmri] in {
187-
def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
188-
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
189-
"shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
190-
[(store (X86fshl (loadi16 addr:$dst), GR16:$src2,
191-
(i8 imm:$src3)), addr:$dst)]>,
192-
TB, OpSize16;
193-
def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
194-
(outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
195-
"shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
196-
[(store (X86fshr GR16:$src2, (loadi16 addr:$dst),
197-
(i8 imm:$src3)), addr:$dst)]>,
198-
TB, OpSize16;
199-
200-
def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
201-
(outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
202-
"shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
203-
[(store (fshl (loadi32 addr:$dst), GR32:$src2,
204-
(i8 imm:$src3)), addr:$dst)]>,
205-
TB, OpSize32;
206-
def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
207-
(outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
208-
"shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
209-
[(store (fshr GR32:$src2, (loadi32 addr:$dst),
210-
(i8 imm:$src3)), addr:$dst)]>,
211-
TB, OpSize32;
212-
213-
def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
214-
(outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
215-
"shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
216-
[(store (fshl (loadi64 addr:$dst), GR64:$src2,
217-
(i8 imm:$src3)), addr:$dst)]>,
218-
TB;
219-
def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
220-
(outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
221-
"shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
222-
[(store (fshr GR64:$src2, (loadi64 addr:$dst),
223-
(i8 imm:$src3)), addr:$dst)]>,
224-
TB;
225-
} // SchedRW
226-
227-
} // Defs = [EFLAGS], hasSideEffects
228-
22971
// Use the opposite rotate if allows us to use the rotate by 1 instruction.
23072
def : Pat<(rotl GR8:$src1, (i8 7)), (ROR8r1 GR8:$src1)>;
23173
def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
@@ -254,6 +96,95 @@ def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst),
25496
def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
25597
(ROL64m1 addr:$dst)>, Requires<[In64BitMode]>;
25698

99+
100+
// Patterns for rotate with relocImm for the immediate field.
101+
def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
102+
(ROL8ri GR8:$src1, relocImm:$src2)>;
103+
def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
104+
(ROL16ri GR16:$src1, relocImm:$src2)>;
105+
def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
106+
(ROL32ri GR32:$src1, relocImm:$src2)>;
107+
def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
108+
(ROL64ri GR64:$src1, relocImm:$src2)>;
109+
110+
def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
111+
(ROR8ri GR8:$src1, relocImm:$src2)>;
112+
def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
113+
(ROR16ri GR16:$src1, relocImm:$src2)>;
114+
def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
115+
(ROR32ri GR32:$src1, relocImm:$src2)>;
116+
def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
117+
(ROR64ri GR64:$src1, relocImm:$src2)>;
118+
119+
//===----------------------------------------------------------------------===//
120+
// Double precision shift instructions (generalizations of rotate)
121+
//===----------------------------------------------------------------------===//
122+
123+
class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
124+
: ITy<o, MRMDestReg, t, (outs t.RegClass:$dst),
125+
(ins t.RegClass:$src1, t.RegClass:$src2, u8imm:$src3), m, triop_args,
126+
[]>, NDD<0, TB> {
127+
let isCommutable = 1;
128+
let ImmT = Imm8;
129+
let SchedRW = [WriteSHDrri];
130+
let Pattern = !if(!eq(m, "shld"),
131+
[(set t.RegClass:$dst, (node t.RegClass:$src1, t.RegClass:$src2, (i8 imm:$src3)))],
132+
[(set t.RegClass:$dst, (node t.RegClass:$src2, t.RegClass:$src1, (i8 imm:$src3)))]);
133+
}
134+
135+
class ShlrdOpRRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
136+
: BinOpRR<o, m, triop_cl_args, t, (outs t.RegClass:$dst), []>, NDD<0, TB> {
137+
let Uses = [CL];
138+
let SchedRW = [WriteSHDrrcl];
139+
let Pattern = !if(!eq(m, "shld"),
140+
[(set t.RegClass:$dst, (node t.RegClass:$src1, t.RegClass:$src2, CL))],
141+
[(set t.RegClass:$dst, (node t.RegClass:$src2, t.RegClass:$src1, CL))]);
142+
}
143+
144+
class ShlrdOpMRI8U_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
145+
: ITy<o, MRMDestMem, t, (outs), (ins t.MemOperand:$src1, t.RegClass:$src2, u8imm:$src3),
146+
m, triop_args, []>, TB {
147+
let ImmT = Imm8;
148+
let SchedRW = [WriteSHDmri];
149+
let mayLoad = 1;
150+
let mayStore = 1;
151+
let Pattern = !if(!eq(m, "shld"),
152+
[(store (node (t.LoadNode addr:$src1), t.RegClass:$src2, (i8 imm:$src3)), addr:$src1)],
153+
[(store (node t.RegClass:$src2, (t.LoadNode addr:$src1), (i8 imm:$src3)), addr:$src1)]);
154+
}
155+
156+
class ShlrdOpMRC_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
157+
: BinOpMR<o, m, triop_cl_args, t, (outs), []>, TB {
158+
let Uses = [CL];
159+
let SchedRW = [WriteSHDmrcl];
160+
let mayStore = 1;
161+
let Pattern = !if(!eq(m, "shld"),
162+
[(store (node (t.LoadNode addr:$src1), t.RegClass:$src2, CL), addr:$src1)],
163+
[(store (node t.RegClass:$src2, (t.LoadNode addr:$src1), CL), addr:$src1)]);
164+
}
165+
166+
multiclass Shlrd<bits<8> o1, bits<8> o2, string m, SDPatternOperator node, SDPatternOperator t_node> {
167+
168+
def 16rri8 : ShlrdOpRRI8U_R<o1, m, Xi16, t_node>, DefEFLAGS, OpSize16;
169+
def 32rri8 : ShlrdOpRRI8U_R<o1, m, Xi32, node>, DefEFLAGS, OpSize32;
170+
def 64rri8 : ShlrdOpRRI8U_R<o1, m, Xi64, node>, DefEFLAGS;
171+
172+
def 16rrCL : ShlrdOpRRC_R<o2, m, Xi16, t_node>, DefEFLAGS, OpSize16;
173+
def 32rrCL : ShlrdOpRRC_R<o2, m, Xi32, node>, DefEFLAGS, OpSize32;
174+
def 64rrCL : ShlrdOpRRC_R<o2, m, Xi64, node>, DefEFLAGS;
175+
176+
def 16mri8 : ShlrdOpMRI8U_M<o1, m, Xi16, t_node>, DefEFLAGS, OpSize16;
177+
def 32mri8 : ShlrdOpMRI8U_M<o1, m, Xi32, node>, DefEFLAGS, OpSize32;
178+
def 64mri8 : ShlrdOpMRI8U_M<o1, m, Xi64, node>, DefEFLAGS;
179+
180+
def 16mrCL : ShlrdOpMRC_M<o2, m, Xi16, t_node>, DefEFLAGS, OpSize16;
181+
def 32mrCL : ShlrdOpMRC_M<o2, m, Xi32, node>, DefEFLAGS, OpSize32;
182+
def 64mrCL : ShlrdOpMRC_M<o2, m, Xi64, node>, DefEFLAGS;
183+
}
184+
185+
defm SHLD : Shlrd<0xA4, 0xA5, "shld", fshl, X86fshl>;
186+
defm SHRD : Shlrd<0xAC, 0xAD, "shrd", fshr, X86fshr>;
187+
257188
// Sandy Bridge and newer Intel processors support faster rotates using
258189
// SHLD to avoid a partial flag update on the normal rotate instructions.
259190
// Use a pseudo so that TwoInstructionPass and register allocation will see
@@ -276,6 +207,10 @@ let Predicates = [HasFastSHLDRotate], AddedComplexity = 5,
276207
[(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$shamt)))]>;
277208
}
278209

210+
//===----------------------------------------------------------------------===//
211+
// BMI Shift/Rotate instructions
212+
//===----------------------------------------------------------------------===//
213+
279214
def ROT32L2R_imm8 : SDNodeXForm<imm, [{
280215
// Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
281216
return getI8Imm(32 - N->getZExtValue(), SDLoc(N));
@@ -446,21 +381,3 @@ let Predicates = [HasBMI2] in {
446381
(INSERT_SUBREG
447382
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
448383
}
449-
450-
def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
451-
(ROL8ri GR8:$src1, relocImm:$src2)>;
452-
def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
453-
(ROL16ri GR16:$src1, relocImm:$src2)>;
454-
def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
455-
(ROL32ri GR32:$src1, relocImm:$src2)>;
456-
def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
457-
(ROL64ri GR64:$src1, relocImm:$src2)>;
458-
459-
def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
460-
(ROR8ri GR8:$src1, relocImm:$src2)>;
461-
def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
462-
(ROR16ri GR16:$src1, relocImm:$src2)>;
463-
def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
464-
(ROR32ri GR32:$src1, relocImm:$src2)>;
465-
def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
466-
(ROR64ri GR64:$src1, relocImm:$src2)>;

llvm/lib/Target/X86/X86InstrUtils.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,15 +100,17 @@ defvar unaryop_ndd_args = "{$src1, $dst|$dst, $src1}";
100100
defvar binop_args = "{$src2, $src1|$src1, $src2}";
101101
defvar binop_ndd_args = "{$src2, $src1, $dst|$dst, $src1, $src2}";
102102
defvar binop_cl_args = "{%cl, $src1|$src1, cl}";
103+
defvar triop_args = "{$src3, $src2, $src1|$src1, $src2, $src3}";
104+
defvar triop_cl_args = "{%cl, $src2, $src1|$src1, $src2, cl}";
103105
defvar tie_dst_src1 = "$src1 = $dst";
104106

105107
// NDD - Helper for new data destination instructions
106-
class NDD<bit ndd> {
108+
class NDD<bit ndd, Map map = OB> {
107109
string Constraints = !if(!eq(ndd, 0), tie_dst_src1, "");
108110
Encoding OpEnc = !if(!eq(ndd, 0), EncNormal, EncEVEX);
109111
bit hasEVEX_B = ndd;
110112
bit hasVEX_4V = ndd;
111-
Map OpMap = !if(!eq(ndd, 0), OB, T_MAP4);
113+
Map OpMap = !if(!eq(ndd, 0), map, T_MAP4);
112114
}
113115
// NF - Helper for NF (no flags update) instructions
114116
class NF: T_MAP4, EVEX, EVEX_NF, NoCD8;

0 commit comments

Comments
 (0)