-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[X86]Add NO_REVERSE attribute to X86 RMW instrs in memfold table #67288
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 ChangesX86 don't want to unfold RMW instrs to 1 load + 1 op + 1 store, because RMW could save code size. Full diff: https://github.com/llvm/llvm-project/pull/67288.diff 2 Files Affected:
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 0e6b8b04f3ca758..498469a55c4f043 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -7,211 +7,211 @@ static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
{X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE},
{X86::ADD8ri_DB, X86::ADD8mi, TB_NO_REVERSE},
{X86::ADD8rr_DB, X86::ADD8mr, TB_NO_REVERSE},
- {X86::ADC16ri, X86::ADC16mi, 0},
- {X86::ADC16ri8, X86::ADC16mi8, 0},
- {X86::ADC16rr, X86::ADC16mr, 0},
- {X86::ADC32ri, X86::ADC32mi, 0},
- {X86::ADC32ri8, X86::ADC32mi8, 0},
- {X86::ADC32rr, X86::ADC32mr, 0},
- {X86::ADC64ri32, X86::ADC64mi32, 0},
- {X86::ADC64ri8, X86::ADC64mi8, 0},
- {X86::ADC64rr, X86::ADC64mr, 0},
- {X86::ADC8ri, X86::ADC8mi, 0},
- {X86::ADC8ri8, X86::ADC8mi8, 0},
- {X86::ADC8rr, X86::ADC8mr, 0},
- {X86::ADD16ri, X86::ADD16mi, 0},
- {X86::ADD16ri8, X86::ADD16mi8, 0},
- {X86::ADD16rr, X86::ADD16mr, 0},
- {X86::ADD32ri, X86::ADD32mi, 0},
- {X86::ADD32ri8, X86::ADD32mi8, 0},
- {X86::ADD32rr, X86::ADD32mr, 0},
- {X86::ADD64ri32, X86::ADD64mi32, 0},
- {X86::ADD64ri8, X86::ADD64mi8, 0},
- {X86::ADD64rr, X86::ADD64mr, 0},
- {X86::ADD8ri, X86::ADD8mi, 0},
- {X86::ADD8ri8, X86::ADD8mi8, 0},
- {X86::ADD8rr, X86::ADD8mr, 0},
- {X86::AND16ri, X86::AND16mi, 0},
- {X86::AND16ri8, X86::AND16mi8, 0},
- {X86::AND16rr, X86::AND16mr, 0},
- {X86::AND32ri, X86::AND32mi, 0},
- {X86::AND32ri8, X86::AND32mi8, 0},
- {X86::AND32rr, X86::AND32mr, 0},
- {X86::AND64ri32, X86::AND64mi32, 0},
- {X86::AND64ri8, X86::AND64mi8, 0},
- {X86::AND64rr, X86::AND64mr, 0},
- {X86::AND8ri, X86::AND8mi, 0},
- {X86::AND8ri8, X86::AND8mi8, 0},
- {X86::AND8rr, X86::AND8mr, 0},
- {X86::BTC16ri8, X86::BTC16mi8, 0},
- {X86::BTC32ri8, X86::BTC32mi8, 0},
- {X86::BTC64ri8, X86::BTC64mi8, 0},
- {X86::BTR16ri8, X86::BTR16mi8, 0},
- {X86::BTR32ri8, X86::BTR32mi8, 0},
- {X86::BTR64ri8, X86::BTR64mi8, 0},
- {X86::BTS16ri8, X86::BTS16mi8, 0},
- {X86::BTS32ri8, X86::BTS32mi8, 0},
- {X86::BTS64ri8, X86::BTS64mi8, 0},
- {X86::DEC16r, X86::DEC16m, 0},
- {X86::DEC32r, X86::DEC32m, 0},
- {X86::DEC64r, X86::DEC64m, 0},
- {X86::DEC8r, X86::DEC8m, 0},
- {X86::INC16r, X86::INC16m, 0},
- {X86::INC32r, X86::INC32m, 0},
- {X86::INC64r, X86::INC64m, 0},
- {X86::INC8r, X86::INC8m, 0},
- {X86::NEG16r, X86::NEG16m, 0},
- {X86::NEG32r, X86::NEG32m, 0},
- {X86::NEG64r, X86::NEG64m, 0},
- {X86::NEG8r, X86::NEG8m, 0},
- {X86::NOT16r, X86::NOT16m, 0},
- {X86::NOT32r, X86::NOT32m, 0},
- {X86::NOT64r, X86::NOT64m, 0},
- {X86::NOT8r, X86::NOT8m, 0},
- {X86::OR16ri, X86::OR16mi, 0},
- {X86::OR16ri8, X86::OR16mi8, 0},
- {X86::OR16rr, X86::OR16mr, 0},
- {X86::OR32ri, X86::OR32mi, 0},
- {X86::OR32ri8, X86::OR32mi8, 0},
- {X86::OR32rr, X86::OR32mr, 0},
- {X86::OR64ri32, X86::OR64mi32, 0},
- {X86::OR64ri8, X86::OR64mi8, 0},
- {X86::OR64rr, X86::OR64mr, 0},
- {X86::OR8ri, X86::OR8mi, 0},
- {X86::OR8ri8, X86::OR8mi8, 0},
- {X86::OR8rr, X86::OR8mr, 0},
- {X86::RCL16r1, X86::RCL16m1, 0},
- {X86::RCL16rCL, X86::RCL16mCL, 0},
- {X86::RCL16ri, X86::RCL16mi, 0},
- {X86::RCL32r1, X86::RCL32m1, 0},
- {X86::RCL32rCL, X86::RCL32mCL, 0},
- {X86::RCL32ri, X86::RCL32mi, 0},
- {X86::RCL64r1, X86::RCL64m1, 0},
- {X86::RCL64rCL, X86::RCL64mCL, 0},
- {X86::RCL64ri, X86::RCL64mi, 0},
- {X86::RCL8r1, X86::RCL8m1, 0},
- {X86::RCL8rCL, X86::RCL8mCL, 0},
- {X86::RCL8ri, X86::RCL8mi, 0},
- {X86::RCR16r1, X86::RCR16m1, 0},
- {X86::RCR16rCL, X86::RCR16mCL, 0},
- {X86::RCR16ri, X86::RCR16mi, 0},
- {X86::RCR32r1, X86::RCR32m1, 0},
- {X86::RCR32rCL, X86::RCR32mCL, 0},
- {X86::RCR32ri, X86::RCR32mi, 0},
- {X86::RCR64r1, X86::RCR64m1, 0},
- {X86::RCR64rCL, X86::RCR64mCL, 0},
- {X86::RCR64ri, X86::RCR64mi, 0},
- {X86::RCR8r1, X86::RCR8m1, 0},
- {X86::RCR8rCL, X86::RCR8mCL, 0},
- {X86::RCR8ri, X86::RCR8mi, 0},
- {X86::ROL16r1, X86::ROL16m1, 0},
- {X86::ROL16rCL, X86::ROL16mCL, 0},
- {X86::ROL16ri, X86::ROL16mi, 0},
- {X86::ROL32r1, X86::ROL32m1, 0},
- {X86::ROL32rCL, X86::ROL32mCL, 0},
- {X86::ROL32ri, X86::ROL32mi, 0},
- {X86::ROL64r1, X86::ROL64m1, 0},
- {X86::ROL64rCL, X86::ROL64mCL, 0},
- {X86::ROL64ri, X86::ROL64mi, 0},
- {X86::ROL8r1, X86::ROL8m1, 0},
- {X86::ROL8rCL, X86::ROL8mCL, 0},
- {X86::ROL8ri, X86::ROL8mi, 0},
- {X86::ROR16r1, X86::ROR16m1, 0},
- {X86::ROR16rCL, X86::ROR16mCL, 0},
- {X86::ROR16ri, X86::ROR16mi, 0},
- {X86::ROR32r1, X86::ROR32m1, 0},
- {X86::ROR32rCL, X86::ROR32mCL, 0},
- {X86::ROR32ri, X86::ROR32mi, 0},
- {X86::ROR64r1, X86::ROR64m1, 0},
- {X86::ROR64rCL, X86::ROR64mCL, 0},
- {X86::ROR64ri, X86::ROR64mi, 0},
- {X86::ROR8r1, X86::ROR8m1, 0},
- {X86::ROR8rCL, X86::ROR8mCL, 0},
- {X86::ROR8ri, X86::ROR8mi, 0},
- {X86::SAR16r1, X86::SAR16m1, 0},
- {X86::SAR16rCL, X86::SAR16mCL, 0},
- {X86::SAR16ri, X86::SAR16mi, 0},
- {X86::SAR32r1, X86::SAR32m1, 0},
- {X86::SAR32rCL, X86::SAR32mCL, 0},
- {X86::SAR32ri, X86::SAR32mi, 0},
- {X86::SAR64r1, X86::SAR64m1, 0},
- {X86::SAR64rCL, X86::SAR64mCL, 0},
- {X86::SAR64ri, X86::SAR64mi, 0},
- {X86::SAR8r1, X86::SAR8m1, 0},
- {X86::SAR8rCL, X86::SAR8mCL, 0},
- {X86::SAR8ri, X86::SAR8mi, 0},
- {X86::SBB16ri, X86::SBB16mi, 0},
- {X86::SBB16ri8, X86::SBB16mi8, 0},
- {X86::SBB16rr, X86::SBB16mr, 0},
- {X86::SBB32ri, X86::SBB32mi, 0},
- {X86::SBB32ri8, X86::SBB32mi8, 0},
- {X86::SBB32rr, X86::SBB32mr, 0},
- {X86::SBB64ri32, X86::SBB64mi32, 0},
- {X86::SBB64ri8, X86::SBB64mi8, 0},
- {X86::SBB64rr, X86::SBB64mr, 0},
- {X86::SBB8ri, X86::SBB8mi, 0},
- {X86::SBB8ri8, X86::SBB8mi8, 0},
- {X86::SBB8rr, X86::SBB8mr, 0},
- {X86::SHL16r1, X86::SHL16m1, 0},
- {X86::SHL16rCL, X86::SHL16mCL, 0},
- {X86::SHL16ri, X86::SHL16mi, 0},
- {X86::SHL32r1, X86::SHL32m1, 0},
- {X86::SHL32rCL, X86::SHL32mCL, 0},
- {X86::SHL32ri, X86::SHL32mi, 0},
- {X86::SHL64r1, X86::SHL64m1, 0},
- {X86::SHL64rCL, X86::SHL64mCL, 0},
- {X86::SHL64ri, X86::SHL64mi, 0},
- {X86::SHL8r1, X86::SHL8m1, 0},
- {X86::SHL8rCL, X86::SHL8mCL, 0},
- {X86::SHL8ri, X86::SHL8mi, 0},
- {X86::SHLD16rrCL, X86::SHLD16mrCL, 0},
- {X86::SHLD16rri8, X86::SHLD16mri8, 0},
- {X86::SHLD32rrCL, X86::SHLD32mrCL, 0},
- {X86::SHLD32rri8, X86::SHLD32mri8, 0},
- {X86::SHLD64rrCL, X86::SHLD64mrCL, 0},
- {X86::SHLD64rri8, X86::SHLD64mri8, 0},
- {X86::SHR16r1, X86::SHR16m1, 0},
- {X86::SHR16rCL, X86::SHR16mCL, 0},
- {X86::SHR16ri, X86::SHR16mi, 0},
- {X86::SHR32r1, X86::SHR32m1, 0},
- {X86::SHR32rCL, X86::SHR32mCL, 0},
- {X86::SHR32ri, X86::SHR32mi, 0},
- {X86::SHR64r1, X86::SHR64m1, 0},
- {X86::SHR64rCL, X86::SHR64mCL, 0},
- {X86::SHR64ri, X86::SHR64mi, 0},
- {X86::SHR8r1, X86::SHR8m1, 0},
- {X86::SHR8rCL, X86::SHR8mCL, 0},
- {X86::SHR8ri, X86::SHR8mi, 0},
- {X86::SHRD16rrCL, X86::SHRD16mrCL, 0},
- {X86::SHRD16rri8, X86::SHRD16mri8, 0},
- {X86::SHRD32rrCL, X86::SHRD32mrCL, 0},
- {X86::SHRD32rri8, X86::SHRD32mri8, 0},
- {X86::SHRD64rrCL, X86::SHRD64mrCL, 0},
- {X86::SHRD64rri8, X86::SHRD64mri8, 0},
- {X86::SUB16ri, X86::SUB16mi, 0},
- {X86::SUB16ri8, X86::SUB16mi8, 0},
- {X86::SUB16rr, X86::SUB16mr, 0},
- {X86::SUB32ri, X86::SUB32mi, 0},
- {X86::SUB32ri8, X86::SUB32mi8, 0},
- {X86::SUB32rr, X86::SUB32mr, 0},
- {X86::SUB64ri32, X86::SUB64mi32, 0},
- {X86::SUB64ri8, X86::SUB64mi8, 0},
- {X86::SUB64rr, X86::SUB64mr, 0},
- {X86::SUB8ri, X86::SUB8mi, 0},
- {X86::SUB8ri8, X86::SUB8mi8, 0},
- {X86::SUB8rr, X86::SUB8mr, 0},
- {X86::XOR16ri, X86::XOR16mi, 0},
- {X86::XOR16ri8, X86::XOR16mi8, 0},
- {X86::XOR16rr, X86::XOR16mr, 0},
- {X86::XOR32ri, X86::XOR32mi, 0},
- {X86::XOR32ri8, X86::XOR32mi8, 0},
- {X86::XOR32rr, X86::XOR32mr, 0},
- {X86::XOR64ri32, X86::XOR64mi32, 0},
- {X86::XOR64ri8, X86::XOR64mi8, 0},
- {X86::XOR64rr, X86::XOR64mr, 0},
- {X86::XOR8ri, X86::XOR8mi, 0},
- {X86::XOR8ri8, X86::XOR8mi8, 0},
- {X86::XOR8rr, X86::XOR8mr, 0},
+ {X86::ADC16ri, X86::ADC16mi, TB_NO_REVERSE},
+ {X86::ADC16ri8, X86::ADC16mi8, TB_NO_REVERSE},
+ {X86::ADC16rr, X86::ADC16mr, TB_NO_REVERSE},
+ {X86::ADC32ri, X86::ADC32mi, TB_NO_REVERSE},
+ {X86::ADC32ri8, X86::ADC32mi8, TB_NO_REVERSE},
+ {X86::ADC32rr, X86::ADC32mr, TB_NO_REVERSE},
+ {X86::ADC64ri32, X86::ADC64mi32, TB_NO_REVERSE},
+ {X86::ADC64ri8, X86::ADC64mi8, TB_NO_REVERSE},
+ {X86::ADC64rr, X86::ADC64mr, TB_NO_REVERSE},
+ {X86::ADC8ri, X86::ADC8mi, TB_NO_REVERSE},
+ {X86::ADC8ri8, X86::ADC8mi8, TB_NO_REVERSE},
+ {X86::ADC8rr, X86::ADC8mr, TB_NO_REVERSE},
+ {X86::ADD16ri, X86::ADD16mi, TB_NO_REVERSE},
+ {X86::ADD16ri8, X86::ADD16mi8, TB_NO_REVERSE},
+ {X86::ADD16rr, X86::ADD16mr, TB_NO_REVERSE},
+ {X86::ADD32ri, X86::ADD32mi, TB_NO_REVERSE},
+ {X86::ADD32ri8, X86::ADD32mi8, TB_NO_REVERSE},
+ {X86::ADD32rr, X86::ADD32mr, TB_NO_REVERSE},
+ {X86::ADD64ri32, X86::ADD64mi32, TB_NO_REVERSE},
+ {X86::ADD64ri8, X86::ADD64mi8, TB_NO_REVERSE},
+ {X86::ADD64rr, X86::ADD64mr, TB_NO_REVERSE},
+ {X86::ADD8ri, X86::ADD8mi, TB_NO_REVERSE},
+ {X86::ADD8ri8, X86::ADD8mi8, TB_NO_REVERSE},
+ {X86::ADD8rr, X86::ADD8mr, TB_NO_REVERSE},
+ {X86::AND16ri, X86::AND16mi, TB_NO_REVERSE},
+ {X86::AND16ri8, X86::AND16mi8, TB_NO_REVERSE},
+ {X86::AND16rr, X86::AND16mr, TB_NO_REVERSE},
+ {X86::AND32ri, X86::AND32mi, TB_NO_REVERSE},
+ {X86::AND32ri8, X86::AND32mi8, TB_NO_REVERSE},
+ {X86::AND32rr, X86::AND32mr, TB_NO_REVERSE},
+ {X86::AND64ri32, X86::AND64mi32, TB_NO_REVERSE},
+ {X86::AND64ri8, X86::AND64mi8, TB_NO_REVERSE},
+ {X86::AND64rr, X86::AND64mr, TB_NO_REVERSE},
+ {X86::AND8ri, X86::AND8mi, TB_NO_REVERSE},
+ {X86::AND8ri8, X86::AND8mi8, TB_NO_REVERSE},
+ {X86::AND8rr, X86::AND8mr, TB_NO_REVERSE},
+ {X86::BTC16ri8, X86::BTC16mi8, TB_NO_REVERSE},
+ {X86::BTC32ri8, X86::BTC32mi8, TB_NO_REVERSE},
+ {X86::BTC64ri8, X86::BTC64mi8, TB_NO_REVERSE},
+ {X86::BTR16ri8, X86::BTR16mi8, TB_NO_REVERSE},
+ {X86::BTR32ri8, X86::BTR32mi8, TB_NO_REVERSE},
+ {X86::BTR64ri8, X86::BTR64mi8, TB_NO_REVERSE},
+ {X86::BTS16ri8, X86::BTS16mi8, TB_NO_REVERSE},
+ {X86::BTS32ri8, X86::BTS32mi8, TB_NO_REVERSE},
+ {X86::BTS64ri8, X86::BTS64mi8, TB_NO_REVERSE},
+ {X86::DEC16r, X86::DEC16m, TB_NO_REVERSE},
+ {X86::DEC32r, X86::DEC32m, TB_NO_REVERSE},
+ {X86::DEC64r, X86::DEC64m, TB_NO_REVERSE},
+ {X86::DEC8r, X86::DEC8m, TB_NO_REVERSE},
+ {X86::INC16r, X86::INC16m, TB_NO_REVERSE},
+ {X86::INC32r, X86::INC32m, TB_NO_REVERSE},
+ {X86::INC64r, X86::INC64m, TB_NO_REVERSE},
+ {X86::INC8r, X86::INC8m, TB_NO_REVERSE},
+ {X86::NEG16r, X86::NEG16m, TB_NO_REVERSE},
+ {X86::NEG32r, X86::NEG32m, TB_NO_REVERSE},
+ {X86::NEG64r, X86::NEG64m, TB_NO_REVERSE},
+ {X86::NEG8r, X86::NEG8m, TB_NO_REVERSE},
+ {X86::NOT16r, X86::NOT16m, TB_NO_REVERSE},
+ {X86::NOT32r, X86::NOT32m, TB_NO_REVERSE},
+ {X86::NOT64r, X86::NOT64m, TB_NO_REVERSE},
+ {X86::NOT8r, X86::NOT8m, TB_NO_REVERSE},
+ {X86::OR16ri, X86::OR16mi, TB_NO_REVERSE},
+ {X86::OR16ri8, X86::OR16mi8, TB_NO_REVERSE},
+ {X86::OR16rr, X86::OR16mr, TB_NO_REVERSE},
+ {X86::OR32ri, X86::OR32mi, TB_NO_REVERSE},
+ {X86::OR32ri8, X86::OR32mi8, TB_NO_REVERSE},
+ {X86::OR32rr, X86::OR32mr, TB_NO_REVERSE},
+ {X86::OR64ri32, X86::OR64mi32, TB_NO_REVERSE},
+ {X86::OR64ri8, X86::OR64mi8, TB_NO_REVERSE},
+ {X86::OR64rr, X86::OR64mr, TB_NO_REVERSE},
+ {X86::OR8ri, X86::OR8mi, TB_NO_REVERSE},
+ {X86::OR8ri8, X86::OR8mi8, TB_NO_REVERSE},
+ {X86::OR8rr, X86::OR8mr, TB_NO_REVERSE},
+ {X86::RCL16r1, X86::RCL16m1, TB_NO_REVERSE},
+ {X86::RCL16rCL, X86::RCL16mCL, TB_NO_REVERSE},
+ {X86::RCL16ri, X86::RCL16mi, TB_NO_REVERSE},
+ {X86::RCL32r1, X86::RCL32m1, TB_NO_REVERSE},
+ {X86::RCL32rCL, X86::RCL32mCL, TB_NO_REVERSE},
+ {X86::RCL32ri, X86::RCL32mi, TB_NO_REVERSE},
+ {X86::RCL64r1, X86::RCL64m1, TB_NO_REVERSE},
+ {X86::RCL64rCL, X86::RCL64mCL, TB_NO_REVERSE},
+ {X86::RCL64ri, X86::RCL64mi, TB_NO_REVERSE},
+ {X86::RCL8r1, X86::RCL8m1, TB_NO_REVERSE},
+ {X86::RCL8rCL, X86::RCL8mCL, TB_NO_REVERSE},
+ {X86::RCL8ri, X86::RCL8mi, TB_NO_REVERSE},
+ {X86::RCR16r1, X86::RCR16m1, TB_NO_REVERSE},
+ {X86::RCR16rCL, X86::RCR16mCL, TB_NO_REVERSE},
+ {X86::RCR16ri, X86::RCR16mi, TB_NO_REVERSE},
+ {X86::RCR32r1, X86::RCR32m1, TB_NO_REVERSE},
+ {X86::RCR32rCL, X86::RCR32mCL, TB_NO_REVERSE},
+ {X86::RCR32ri, X86::RCR32mi, TB_NO_REVERSE},
+ {X86::RCR64r1, X86::RCR64m1, TB_NO_REVERSE},
+ {X86::RCR64rCL, X86::RCR64mCL, TB_NO_REVERSE},
+ {X86::RCR64ri, X86::RCR64mi, TB_NO_REVERSE},
+ {X86::RCR8r1, X86::RCR8m1, TB_NO_REVERSE},
+ {X86::RCR8rCL, X86::RCR8mCL, TB_NO_REVERSE},
+ {X86::RCR8ri, X86::RCR8mi, TB_NO_REVERSE},
+ {X86::ROL16r1, X86::ROL16m1, TB_NO_REVERSE},
+ {X86::ROL16rCL, X86::ROL16mCL, TB_NO_REVERSE},
+ {X86::ROL16ri, X86::ROL16mi, TB_NO_REVERSE},
+ {X86::ROL32r1, X86::ROL32m1, TB_NO_REVERSE},
+ {X86::ROL32rCL, X86::ROL32mCL, TB_NO_REVERSE},
+ {X86::ROL32ri, X86::ROL32mi, TB_NO_REVERSE},
+ {X86::ROL64r1, X86::ROL64m1, TB_NO_REVERSE},
+ {X86::ROL64rCL, X86::ROL64mCL, TB_NO_REVERSE},
+ {X86::ROL64ri, X86::ROL64mi, TB_NO_REVERSE},
+ {X86::ROL8r1, X86::ROL8m1, TB_NO_REVERSE},
+ {X86::ROL8rCL, X86::ROL8mCL, TB_NO_REVERSE},
+ {X86::ROL8ri, X86::ROL8mi, TB_NO_REVERSE},
+ {X86::ROR16r1, X86::ROR16m1, TB_NO_REVERSE},
+ {X86::ROR16rCL, X86::ROR16mCL, TB_NO_REVERSE},
+ {X86::ROR16ri, X86::ROR16mi, TB_NO_REVERSE},
+ {X86::ROR32r1, X86::ROR32m1, TB_NO_REVERSE},
+ {X86::ROR32rCL, X86::ROR32mCL, TB_NO_REVERSE},
+ {X86::ROR32ri, X86::ROR32mi, TB_NO_REVERSE},
+ {X86::ROR64r1, X86::ROR64m1, TB_NO_REVERSE},
+ {X86::ROR64rCL, X86::ROR64mCL, TB_NO_REVERSE},
+ {X86::ROR64ri, X86::ROR64mi, TB_NO_REVERSE},
+ {X86::ROR8r1, X86::ROR8m1, TB_NO_REVERSE},
+ {X86::ROR8rCL, X86::ROR8mCL, TB_NO_REVERSE},
+ {X86::ROR8ri, X86::ROR8mi, TB_NO_REVERSE},
+ {X86::SAR16r1, X86::SAR16m1, TB_NO_REVERSE},
+ {X86::SAR16rCL, X86::SAR16mCL, TB_NO_REVERSE},
+ {X86::SAR16ri, X86::SAR16mi, TB_NO_REVERSE},
+ {X86::SAR32r1, X86::SAR32m1, TB_NO_REVERSE},
+ {X86::SAR32rCL, X86::SAR32mCL, TB_NO_REVERSE},
+ {X86::SAR32ri, X86::SAR32mi, TB_NO_REVERSE},
+ {X86::SAR64r1, X86::SAR64m1, TB_NO_REVERSE},
+ {X86::SAR64rCL, X86::SAR64mCL, TB_NO_REVERSE},
+ {X86::SAR64ri, X86::SAR64mi, TB_NO_REVERSE},
+ {X86::SAR8r1, X86::SAR8m1, TB_NO_REVERSE},
+ {X86::SAR8rCL, X86::SAR8mCL, TB_NO_REVERSE},
+ {X86::SAR8ri, X86::SAR8mi, TB_NO_REVERSE},
+ {X86::SBB16ri, X86::SBB16mi, TB_NO_REVERSE},
+ {X86::SBB16ri8, X86::SBB16mi8, TB_NO_REVERSE},
+ {X86::SBB16rr, X86::SBB16mr, TB_NO_REVERSE},
+ {X86::SBB32ri, X86::SBB32mi, TB_NO_REVERSE},
+ {X86::SBB32ri8, X86::SBB32mi8, TB_NO_REVERSE},
+ {X86::SBB32rr, X86::SBB32mr, TB_NO_REVERSE},
+ {X86::SBB64ri32, X86::SBB64mi32, TB_NO_REVERSE},
+ {X86::SBB64ri8, X86::SBB64mi8, TB_NO_REVERSE},
+ {X86::SBB64rr, X86::SBB64mr, TB_NO_REVERSE},
+ {X86::SBB8ri, X86::SBB8mi, TB_NO_REVERSE},
+ {X86::SBB8ri8, X86::SBB8mi8, TB_NO_REVERSE},
+ {X86::SBB8rr, X86::SBB8mr, TB_NO_REVERSE},
+ {X86::SHL16r1, X86::SHL16m1, TB_NO_REVERSE},
+ {X86::SHL16rCL, X86::SHL16mCL, TB_NO_REVERSE},
+ {X86::SHL16ri, X86::SHL16mi, TB_NO_REVERSE},
+ {X86::SHL32r1, X86::SHL32m1, TB_NO_REVERSE},
+ {X86::SHL32rCL, X86::SHL32mCL, TB_NO_REVERSE},
+ {X86::SHL32ri, X86::SHL32mi, TB_NO_REVERSE},
+ {X86::SHL64r1, X86::SHL64m1, TB_NO_REVERSE},
+ {X86::SHL64rCL, X86::SHL64mCL, TB_NO_REVERSE},
+ {X86::SHL64ri, X86::SHL64mi, TB_NO_REVERSE},
+ {X86::SHL8r1, X86::SHL8m1, TB_NO_REVERSE},
+ {X86::SHL8rCL, X86::SHL8mCL, TB_NO_REVERSE},
+ {X86::SHL8ri, X86::SHL8mi, TB_NO_REVERSE},
+ {X86::SHLD16rrCL, X86::SHLD16mrCL, TB_NO_REVERSE},
+ {X86::SHLD16rri8, X86::SHLD16mri8, TB_NO_REVERSE},
+ {X86::SHLD32rrCL, X86::SHLD32mrCL, TB_NO_REVERSE},
+ {X86::SHLD32rri8, X86::SHLD32mri8, TB_NO_REVERSE},
+ {X86::SHLD64rrCL, X86::SHLD64mrCL, TB_NO_REVERSE},
+ {X86::SHLD64rri8, X86::SHLD64mri8, TB_NO_REVERSE},
+ {X86::SHR16r1, X86::SHR16m1, TB_NO_REVERSE},
+ {X86::SHR16rCL, X86::SHR16mCL, TB_NO_REVERSE},
+ {X86::SHR16ri, X86::SHR16mi, TB_NO_REVERSE},
+ {X86::SHR32r1, X86::SHR32m1, TB_NO_REVERSE},
+ {X86::SHR32rCL, X86::SHR32mCL, TB_NO_REVERSE},
+ {X86::SHR32ri, X86::SHR32mi, TB_NO_REVERSE},
+ {X86::SHR64r1, X86::SHR64m1, TB_NO_REVERSE},
+ {X86::SHR64rCL, X86::SHR64mCL, TB_NO_REVERSE},
+ {X86::SHR64ri, X86::SHR64mi, TB_NO_REVERSE},
+ {X86::SHR8r1, X86::SHR8m1, TB_NO_REVERSE},
+ {X86::SHR8rCL, X86::SHR8mCL, TB_NO_REVERSE},
+ {X86::SHR8ri, X86::SHR8mi, TB_NO_REVERSE},
+ {X86::SHRD16rrCL, X86::SHRD16mrCL, TB_NO_REVERSE},
+ {X86::SHRD16rri8, X86::SHRD16mri8, TB_NO_REVERSE},
+ {X86::SHRD32rrCL, X86::SHRD32mrCL, TB_NO_REVERSE},
+ {X86::SHRD32rri8, X86::SHRD32mri8, TB_NO_REVERSE},
+ {X86::SHRD64rrCL, X86::SHRD64mrCL, TB_NO_REVERSE},
+ {X86::SHRD64rri8, X86::SHRD64mri8, TB_NO_REVERSE},
+ {X86::SUB16ri, X86::SUB16mi, TB_NO_REVERSE},
+ {X86::SUB16ri8, X86::SUB16mi8, TB_NO_REVERSE},
+ {X86::SUB16rr, X86::SUB16mr, TB_NO_REVERSE},
+ {X86::SUB32ri, X86::SUB32mi, TB_NO_REVERSE},
+ {X86::SUB32ri8, X86::SUB32mi8, TB_NO_REVERSE},
+ {X86::SUB32rr, X86::SUB32mr, TB_NO_REVERSE},
+ {X86::SUB64ri32, X86::SUB64mi32, TB_NO_REVERSE},
+ {X86::SUB64ri8, X86::SUB64mi8, TB_NO_REVERSE},
+ {X86::SUB64rr, X86::SUB64mr, TB_NO_REVERSE},
+ {X86::SUB8ri, X86::SUB8mi, TB_NO_REVERSE},
+ {X86::SUB8ri8, X86::SUB8mi8, TB_NO_REVERSE},
+ {X86::SUB8rr, X86::SUB8mr, TB_NO_REVERSE},
+ {X86::XOR16ri, X86::XOR16mi, TB_NO_REVERSE},
+ {X86::XOR16ri8, X86::XOR16mi8, TB_NO_REVERSE},
+ {X86::XOR16rr, X86::XOR16mr, TB_NO_REVERSE},
+ {X86::XOR32ri, X86::XOR32mi, TB_NO_REVERSE},
+ {X86::XOR32ri8, X86::XOR32mi8, TB_NO_REVERSE},
+ {X86::XOR32rr, X86::XOR32mr, TB_NO_REVERSE},
+ {X86::XOR64ri32, X86::XOR64mi32, TB_NO_REVERSE},
+ {X86::XOR64ri8, X86::XOR64mi8, TB_NO_REVERSE},
+ {X86::XOR64rr, X86::XOR64mr, TB_NO_REVERSE},
+ {X86::XOR8ri, X86::XOR8mi, TB_NO_REVERSE},
+ {X86::XOR8ri8, X86::XOR8mi8, TB_NO_REVERSE},
+ {X86::XOR8rr, X86::XOR8mr, TB_NO_REVERSE},
};
static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 89d93e4d3cbc48c..ff63a6565c09f32 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -433,7 +433,9 @@ void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
// the unfolded load size will be based on the register size. If that’s bigger
// than the memory operand size, the unfolded load will load more memory and
// potentially cause a memory fault.
- if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
+ // And X86 would not unfold RMW instrs.
+ if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec) ||
+ &Table == &Table2Addr)
Result.NoReverse = true;
// Check no-kz version's isMoveReg
|
The function unfoldMemoryOperand called in below:
The callers except 4,5 use // unfolding an x86 DEC64m operation results in store, dec, load which assert(NewNodes.size() == 2 && "Expected a load folding node!"); |
if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec)) | ||
// And X86 would not unfold RMW instrs. | ||
if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec) || | ||
&Table == &Table2Addr) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is that the best we can do? Just bailing based on which table we're emitting seems very coarse.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could add it in outer caller,
// Instructions which Read-Modify-Write should be added to Table2Addr.
if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) {
addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S | TB_NO_REVERSE, 0, IsManual);
return;
}
Maybe this could be better?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That sounds better.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I found the S attribute is workable only for manual table, so this doesn't work. So I may need to change the logic for addEntryWithFlags.
It's not only for code save. When the register pressure is very high, we can avoid clobbering one register by RMW. |
@KanRobert Got it. Thank you for explanation. |
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Thanks for your reviews! |
My mistake, it's a different PR. |
…m#67288) X86 don't want to unfold RMW instrs to 1 load + 1 op + 1 store, because RMW could save code size and benefit RA when reg pressure is high. And from all the call position analysis, we could find we didn't unfold RMW in current code.
After patch #67288 landed, unfoldMemoryOperand would not return NewMIs whose size ==3. So the removed line is useless.
X86 don't want to unfold RMW instrs to 1 load + 1 op + 1 store, because RMW could save code size and benefit RA when reg pressure is high.
And from all the call position analysis, we could find we didn't unfold RMW in current code.