Skip to content

Commit f44079d

Browse files
authored
[ISel] Add pattern matching for depositing subreg value (#75978)
Depositing value into the lowest byte/word is a common code pattern. This patch improves the code generation for it to avoid redundant AND and OR operations.
1 parent 6a870cc commit f44079d

File tree

2 files changed

+110
-0
lines changed

2 files changed

+110
-0
lines changed

llvm/lib/Target/X86/X86InstrCompiler.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1515,6 +1515,23 @@ def : Pat<(X86add_flag_nocf GR32:$src1, 128),
15151515
def : Pat<(X86add_flag_nocf GR64:$src1, 128),
15161516
(SUB64ri32 GR64:$src1, -128)>;
15171517

1518+
// Depositing value to 8/16 bit subreg:
1519+
def : Pat<(or (and GR64:$dst, -256),
1520+
(i64 (zextloadi8 addr:$src))),
1521+
(INSERT_SUBREG (i64 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1522+
1523+
def : Pat<(or (and GR32:$dst, -256),
1524+
(i32 (zextloadi8 addr:$src))),
1525+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV8rm i8mem:$src), sub_8bit)>;
1526+
1527+
def : Pat<(or (and GR64:$dst, -65536),
1528+
(i64 (zextloadi16 addr:$src))),
1529+
(INSERT_SUBREG (i64 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
1530+
1531+
def : Pat<(or (and GR32:$dst, -65536),
1532+
(i32 (zextloadi16 addr:$src))),
1533+
(INSERT_SUBREG (i32 (COPY $dst)), (MOV16rm i16mem:$src), sub_16bit)>;
1534+
15181535
// The same trick applies for 32-bit immediate fields in 64-bit
15191536
// instructions.
15201537
def : Pat<(add GR64:$src1, 0x0000000080000000),

llvm/test/CodeGen/X86/insert.ll

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s --check-prefixes=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
4+
5+
define i64 @sub8(i64 noundef %res, ptr %byte) {
6+
; X86-LABEL: sub8:
7+
; X86: # %bb.0: # %entry
8+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
9+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
10+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
11+
; X86-NEXT: movb (%ecx), %al
12+
; X86-NEXT: retl
13+
;
14+
; X64-LABEL: sub8:
15+
; X64: # %bb.0: # %entry
16+
; X64-NEXT: movq %rdi, %rax
17+
; X64-NEXT: movb (%rsi), %al
18+
; X64-NEXT: retq
19+
entry:
20+
%and = and i64 %res, -256
21+
%d = load i8, ptr %byte, align 1
22+
%conv2 = zext i8 %d to i64
23+
%or = or i64 %and, %conv2
24+
ret i64 %or
25+
}
26+
27+
define i64 @sub16(i64 noundef %res, ptr %byte) {
28+
; X86-LABEL: sub16:
29+
; X86: # %bb.0: # %entry
30+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
31+
; X86-NEXT: shll $16, %ecx
32+
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
33+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
34+
; X86-NEXT: movzwl (%eax), %eax
35+
; X86-NEXT: orl %ecx, %eax
36+
; X86-NEXT: retl
37+
;
38+
; X64-LABEL: sub16:
39+
; X64: # %bb.0: # %entry
40+
; X64-NEXT: movq %rdi, %rax
41+
; X64-NEXT: movw (%rsi), %ax
42+
; X64-NEXT: retq
43+
entry:
44+
%and = and i64 %res, -65536
45+
%d = load i16, ptr %byte, align 1
46+
%conv2 = zext i16 %d to i64
47+
%or = or i64 %and, %conv2
48+
ret i64 %or
49+
}
50+
51+
define i32 @sub8_32(i32 noundef %res, ptr %byte) {
52+
; X86-LABEL: sub8_32:
53+
; X86: # %bb.0: # %entry
54+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
55+
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
56+
; X86-NEXT: movb (%ecx), %al
57+
; X86-NEXT: retl
58+
;
59+
; X64-LABEL: sub8_32:
60+
; X64: # %bb.0: # %entry
61+
; X64-NEXT: movl %edi, %eax
62+
; X64-NEXT: movb (%rsi), %al
63+
; X64-NEXT: retq
64+
entry:
65+
%and = and i32 %res, -256
66+
%d = load i8, ptr %byte, align 1
67+
%conv2 = zext i8 %d to i32
68+
%or = or i32 %and, %conv2
69+
ret i32 %or
70+
}
71+
72+
define i32 @sub16_32(i32 noundef %res, ptr %byte) {
73+
; X86-LABEL: sub16_32:
74+
; X86: # %bb.0: # %entry
75+
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
76+
; X86-NEXT: shll $16, %ecx
77+
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
78+
; X86-NEXT: movzwl (%eax), %eax
79+
; X86-NEXT: orl %ecx, %eax
80+
; X86-NEXT: retl
81+
;
82+
; X64-LABEL: sub16_32:
83+
; X64: # %bb.0: # %entry
84+
; X64-NEXT: movl %edi, %eax
85+
; X64-NEXT: movw (%rsi), %ax
86+
; X64-NEXT: retq
87+
entry:
88+
%and = and i32 %res, -65536
89+
%d = load i16, ptr %byte, align 1
90+
%conv2 = zext i16 %d to i32
91+
%or = or i32 %and, %conv2
92+
ret i32 %or
93+
}

0 commit comments

Comments
 (0)