@@ -68,164 +68,6 @@ defm ROR: ShiftRotate<"ror", MRM1r, MRM1m, rotr, WriteRotateCL, WriteRotate, Wri
68
68
defm RCL: ShiftRotate<"rcl", MRM2r, MRM2m, null_frag, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd, [EFLAGS]>;
69
69
defm RCR: ShiftRotate<"rcr", MRM3r, MRM3m, null_frag, WriteRotateCL, WriteRotate, WriteRotateCLLd, WriteRotateLd, [EFLAGS]>;
70
70
71
- //===----------------------------------------------------------------------===//
72
- // Double shift instructions (generalizations of rotate)
73
- //===----------------------------------------------------------------------===//
74
-
75
- let Defs = [EFLAGS], hasSideEffects = 0 in {
76
- let Constraints = "$src1 = $dst" in {
77
-
78
- let Uses = [CL], SchedRW = [WriteSHDrrcl] in {
79
- def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst),
80
- (ins GR16:$src1, GR16:$src2),
81
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
82
- [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2, CL))]>,
83
- TB, OpSize16;
84
- def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst),
85
- (ins GR16:$src1, GR16:$src2),
86
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
87
- [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1, CL))]>,
88
- TB, OpSize16;
89
- def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst),
90
- (ins GR32:$src1, GR32:$src2),
91
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
92
- [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2, CL))]>,
93
- TB, OpSize32;
94
- def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
95
- (ins GR32:$src1, GR32:$src2),
96
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
97
- [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1, CL))]>,
98
- TB, OpSize32;
99
- def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
100
- (ins GR64:$src1, GR64:$src2),
101
- "shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
102
- [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2, CL))]>,
103
- TB;
104
- def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst),
105
- (ins GR64:$src1, GR64:$src2),
106
- "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
107
- [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1, CL))]>,
108
- TB;
109
- } // Uses, SchedRW
110
-
111
- let isCommutable = 1, SchedRW = [WriteSHDrri] in { // These instructions commute to each other.
112
- def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
113
- (outs GR16:$dst),
114
- (ins GR16:$src1, GR16:$src2, u8imm:$src3),
115
- "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
116
- [(set GR16:$dst, (X86fshl GR16:$src1, GR16:$src2,
117
- (i8 imm:$src3)))]>,
118
- TB, OpSize16;
119
- def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
120
- (outs GR16:$dst),
121
- (ins GR16:$src1, GR16:$src2, u8imm:$src3),
122
- "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
123
- [(set GR16:$dst, (X86fshr GR16:$src2, GR16:$src1,
124
- (i8 imm:$src3)))]>,
125
- TB, OpSize16;
126
- def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
127
- (outs GR32:$dst),
128
- (ins GR32:$src1, GR32:$src2, u8imm:$src3),
129
- "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
130
- [(set GR32:$dst, (fshl GR32:$src1, GR32:$src2,
131
- (i8 imm:$src3)))]>,
132
- TB, OpSize32;
133
- def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
134
- (outs GR32:$dst),
135
- (ins GR32:$src1, GR32:$src2, u8imm:$src3),
136
- "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
137
- [(set GR32:$dst, (fshr GR32:$src2, GR32:$src1,
138
- (i8 imm:$src3)))]>,
139
- TB, OpSize32;
140
- def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
141
- (outs GR64:$dst),
142
- (ins GR64:$src1, GR64:$src2, u8imm:$src3),
143
- "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
144
- [(set GR64:$dst, (fshl GR64:$src1, GR64:$src2,
145
- (i8 imm:$src3)))]>,
146
- TB;
147
- def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
148
- (outs GR64:$dst),
149
- (ins GR64:$src1, GR64:$src2, u8imm:$src3),
150
- "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
151
- [(set GR64:$dst, (fshr GR64:$src2, GR64:$src1,
152
- (i8 imm:$src3)))]>,
153
- TB;
154
- } // SchedRW
155
- } // Constraints = "$src = $dst"
156
-
157
- let Uses = [CL], SchedRW = [WriteSHDmrcl] in {
158
- def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
159
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
160
- [(store (X86fshl (loadi16 addr:$dst), GR16:$src2, CL),
161
- addr:$dst)]>, TB, OpSize16;
162
- def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
163
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, cl}",
164
- [(store (X86fshr GR16:$src2, (loadi16 addr:$dst), CL),
165
- addr:$dst)]>, TB, OpSize16;
166
-
167
- def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
168
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
169
- [(store (fshl (loadi32 addr:$dst), GR32:$src2, CL),
170
- addr:$dst)]>, TB, OpSize32;
171
- def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
172
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, cl}",
173
- [(store (fshr GR32:$src2, (loadi32 addr:$dst), CL),
174
- addr:$dst)]>, TB, OpSize32;
175
-
176
- def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
177
- "shld{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
178
- [(store (fshl (loadi64 addr:$dst), GR64:$src2, CL),
179
- addr:$dst)]>, TB;
180
- def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
181
- "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, cl}",
182
- [(store (fshr GR64:$src2, (loadi64 addr:$dst), CL),
183
- addr:$dst)]>, TB;
184
- } // Uses, SchedRW
185
-
186
- let SchedRW = [WriteSHDmri] in {
187
- def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
188
- (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
189
- "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
190
- [(store (X86fshl (loadi16 addr:$dst), GR16:$src2,
191
- (i8 imm:$src3)), addr:$dst)]>,
192
- TB, OpSize16;
193
- def SHRD16mri8 : Ii8<0xAC, MRMDestMem,
194
- (outs), (ins i16mem:$dst, GR16:$src2, u8imm:$src3),
195
- "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
196
- [(store (X86fshr GR16:$src2, (loadi16 addr:$dst),
197
- (i8 imm:$src3)), addr:$dst)]>,
198
- TB, OpSize16;
199
-
200
- def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
201
- (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
202
- "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
203
- [(store (fshl (loadi32 addr:$dst), GR32:$src2,
204
- (i8 imm:$src3)), addr:$dst)]>,
205
- TB, OpSize32;
206
- def SHRD32mri8 : Ii8<0xAC, MRMDestMem,
207
- (outs), (ins i32mem:$dst, GR32:$src2, u8imm:$src3),
208
- "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
209
- [(store (fshr GR32:$src2, (loadi32 addr:$dst),
210
- (i8 imm:$src3)), addr:$dst)]>,
211
- TB, OpSize32;
212
-
213
- def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
214
- (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
215
- "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
216
- [(store (fshl (loadi64 addr:$dst), GR64:$src2,
217
- (i8 imm:$src3)), addr:$dst)]>,
218
- TB;
219
- def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
220
- (outs), (ins i64mem:$dst, GR64:$src2, u8imm:$src3),
221
- "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
222
- [(store (fshr GR64:$src2, (loadi64 addr:$dst),
223
- (i8 imm:$src3)), addr:$dst)]>,
224
- TB;
225
- } // SchedRW
226
-
227
- } // Defs = [EFLAGS], hasSideEffects
228
-
229
71
// Use the opposite rotate if allows us to use the rotate by 1 instruction.
230
72
def : Pat<(rotl GR8:$src1, (i8 7)), (ROR8r1 GR8:$src1)>;
231
73
def : Pat<(rotl GR16:$src1, (i8 15)), (ROR16r1 GR16:$src1)>;
@@ -254,6 +96,95 @@ def : Pat<(store (rotr (loadi32 addr:$dst), (i8 31)), addr:$dst),
254
96
def : Pat<(store (rotr (loadi64 addr:$dst), (i8 63)), addr:$dst),
255
97
(ROL64m1 addr:$dst)>, Requires<[In64BitMode]>;
256
98
99
+
100
+ // Patterns for rotate with relocImm for the immediate field.
101
+ def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
102
+ (ROL8ri GR8:$src1, relocImm:$src2)>;
103
+ def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
104
+ (ROL16ri GR16:$src1, relocImm:$src2)>;
105
+ def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
106
+ (ROL32ri GR32:$src1, relocImm:$src2)>;
107
+ def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
108
+ (ROL64ri GR64:$src1, relocImm:$src2)>;
109
+
110
+ def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
111
+ (ROR8ri GR8:$src1, relocImm:$src2)>;
112
+ def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
113
+ (ROR16ri GR16:$src1, relocImm:$src2)>;
114
+ def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
115
+ (ROR32ri GR32:$src1, relocImm:$src2)>;
116
+ def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
117
+ (ROR64ri GR64:$src1, relocImm:$src2)>;
118
+
119
+ //===----------------------------------------------------------------------===//
120
+ // Double precision shift instructions (generalizations of rotate)
121
+ //===----------------------------------------------------------------------===//
122
+
123
+ class ShlrdOpRRI8U_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
124
+ : ITy<o, MRMDestReg, t, (outs t.RegClass:$dst),
125
+ (ins t.RegClass:$src1, t.RegClass:$src2, u8imm:$src3), m, triop_args,
126
+ []>, NDD<0, TB> {
127
+ let isCommutable = 1;
128
+ let ImmT = Imm8;
129
+ let SchedRW = [WriteSHDrri];
130
+ let Pattern = !if(!eq(m, "shld"),
131
+ [(set t.RegClass:$dst, (node t.RegClass:$src1, t.RegClass:$src2, (i8 imm:$src3)))],
132
+ [(set t.RegClass:$dst, (node t.RegClass:$src2, t.RegClass:$src1, (i8 imm:$src3)))]);
133
+ }
134
+
135
+ class ShlrdOpRRC_R<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
136
+ : BinOpRR<o, m, triop_cl_args, t, (outs t.RegClass:$dst), []>, NDD<0, TB> {
137
+ let Uses = [CL];
138
+ let SchedRW = [WriteSHDrrcl];
139
+ let Pattern = !if(!eq(m, "shld"),
140
+ [(set t.RegClass:$dst, (node t.RegClass:$src1, t.RegClass:$src2, CL))],
141
+ [(set t.RegClass:$dst, (node t.RegClass:$src2, t.RegClass:$src1, CL))]);
142
+ }
143
+
144
+ class ShlrdOpMRI8U_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
145
+ : ITy<o, MRMDestMem, t, (outs), (ins t.MemOperand:$src1, t.RegClass:$src2, u8imm:$src3),
146
+ m, triop_args, []>, TB {
147
+ let ImmT = Imm8;
148
+ let SchedRW = [WriteSHDmri];
149
+ let mayLoad = 1;
150
+ let mayStore = 1;
151
+ let Pattern = !if(!eq(m, "shld"),
152
+ [(store (node (t.LoadNode addr:$src1), t.RegClass:$src2, (i8 imm:$src3)), addr:$src1)],
153
+ [(store (node t.RegClass:$src2, (t.LoadNode addr:$src1), (i8 imm:$src3)), addr:$src1)]);
154
+ }
155
+
156
+ class ShlrdOpMRC_M<bits<8> o, string m, X86TypeInfo t, SDPatternOperator node>
157
+ : BinOpMR<o, m, triop_cl_args, t, (outs), []>, TB {
158
+ let Uses = [CL];
159
+ let SchedRW = [WriteSHDmrcl];
160
+ let mayStore = 1;
161
+ let Pattern = !if(!eq(m, "shld"),
162
+ [(store (node (t.LoadNode addr:$src1), t.RegClass:$src2, CL), addr:$src1)],
163
+ [(store (node t.RegClass:$src2, (t.LoadNode addr:$src1), CL), addr:$src1)]);
164
+ }
165
+
166
+ multiclass Shlrd<bits<8> o1, bits<8> o2, string m, SDPatternOperator node, SDPatternOperator t_node> {
167
+
168
+ def 16rri8 : ShlrdOpRRI8U_R<o1, m, Xi16, t_node>, DefEFLAGS, OpSize16;
169
+ def 32rri8 : ShlrdOpRRI8U_R<o1, m, Xi32, node>, DefEFLAGS, OpSize32;
170
+ def 64rri8 : ShlrdOpRRI8U_R<o1, m, Xi64, node>, DefEFLAGS;
171
+
172
+ def 16rrCL : ShlrdOpRRC_R<o2, m, Xi16, t_node>, DefEFLAGS, OpSize16;
173
+ def 32rrCL : ShlrdOpRRC_R<o2, m, Xi32, node>, DefEFLAGS, OpSize32;
174
+ def 64rrCL : ShlrdOpRRC_R<o2, m, Xi64, node>, DefEFLAGS;
175
+
176
+ def 16mri8 : ShlrdOpMRI8U_M<o1, m, Xi16, t_node>, DefEFLAGS, OpSize16;
177
+ def 32mri8 : ShlrdOpMRI8U_M<o1, m, Xi32, node>, DefEFLAGS, OpSize32;
178
+ def 64mri8 : ShlrdOpMRI8U_M<o1, m, Xi64, node>, DefEFLAGS;
179
+
180
+ def 16mrCL : ShlrdOpMRC_M<o2, m, Xi16, t_node>, DefEFLAGS, OpSize16;
181
+ def 32mrCL : ShlrdOpMRC_M<o2, m, Xi32, node>, DefEFLAGS, OpSize32;
182
+ def 64mrCL : ShlrdOpMRC_M<o2, m, Xi64, node>, DefEFLAGS;
183
+ }
184
+
185
+ defm SHLD : Shlrd<0xA4, 0xA5, "shld", fshl, X86fshl>;
186
+ defm SHRD : Shlrd<0xAC, 0xAD, "shrd", fshr, X86fshr>;
187
+
257
188
// Sandy Bridge and newer Intel processors support faster rotates using
258
189
// SHLD to avoid a partial flag update on the normal rotate instructions.
259
190
// Use a pseudo so that TwoInstructionPass and register allocation will see
@@ -276,6 +207,10 @@ let Predicates = [HasFastSHLDRotate], AddedComplexity = 5,
276
207
[(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$shamt)))]>;
277
208
}
278
209
210
+ //===----------------------------------------------------------------------===//
211
+ // BMI Shift/Rotate instructions
212
+ //===----------------------------------------------------------------------===//
213
+
279
214
def ROT32L2R_imm8 : SDNodeXForm<imm, [{
280
215
// Convert a ROTL shamt to a ROTR shamt on 32-bit integer.
281
216
return getI8Imm(32 - N->getZExtValue(), SDLoc(N));
@@ -446,21 +381,3 @@ let Predicates = [HasBMI2] in {
446
381
(INSERT_SUBREG
447
382
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
448
383
}
449
-
450
- def : Pat<(rotl GR8:$src1, (i8 relocImm:$src2)),
451
- (ROL8ri GR8:$src1, relocImm:$src2)>;
452
- def : Pat<(rotl GR16:$src1, (i8 relocImm:$src2)),
453
- (ROL16ri GR16:$src1, relocImm:$src2)>;
454
- def : Pat<(rotl GR32:$src1, (i8 relocImm:$src2)),
455
- (ROL32ri GR32:$src1, relocImm:$src2)>;
456
- def : Pat<(rotl GR64:$src1, (i8 relocImm:$src2)),
457
- (ROL64ri GR64:$src1, relocImm:$src2)>;
458
-
459
- def : Pat<(rotr GR8:$src1, (i8 relocImm:$src2)),
460
- (ROR8ri GR8:$src1, relocImm:$src2)>;
461
- def : Pat<(rotr GR16:$src1, (i8 relocImm:$src2)),
462
- (ROR16ri GR16:$src1, relocImm:$src2)>;
463
- def : Pat<(rotr GR32:$src1, (i8 relocImm:$src2)),
464
- (ROR32ri GR32:$src1, relocImm:$src2)>;
465
- def : Pat<(rotr GR64:$src1, (i8 relocImm:$src2)),
466
- (ROR64ri GR64:$src1, relocImm:$src2)>;
0 commit comments