-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[sanitizer][asan][msvc] Teach GetInstructionSize about many instructions that appear in MSVC generated code. #69490
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ba4c1a9
b5b73ce
5f900ff
edfffaa
39b422d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -494,6 +494,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { | |
case 0x6A: // 6A XX = push XX | ||
return 2; | ||
|
||
// This instruction can be encoded with a 16-bit immediate but that is | ||
// incredibly unlikely. | ||
case 0x68: // 68 XX XX XX XX : push imm32 | ||
return 5; | ||
|
||
case 0xb8: // b8 XX XX XX XX : mov eax, XX XX XX XX | ||
case 0xB9: // b9 XX XX XX XX : mov ecx, XX XX XX XX | ||
return 5; | ||
|
@@ -532,6 +537,9 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { | |
case 0xC033: // 33 C0 : xor eax, eax | ||
case 0xC933: // 33 C9 : xor ecx, ecx | ||
case 0xD233: // 33 D2 : xor edx, edx | ||
case 0xDB84: // 84 DB : test bl,bl | ||
case 0xC984: // 84 C9 : test cl,cl | ||
case 0xD284: // 84 D2 : test dl,dl | ||
return 2; | ||
|
||
// Cannot overwrite control-instruction. Return 0 to indicate failure. | ||
|
@@ -540,6 +548,9 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { | |
} | ||
|
||
switch (0x00FFFFFF & *(u32*)address) { | ||
case 0xF8E484: // 83 E4 F8 : and esp, 0xFFFFFFF8 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 0xF8E484 -> 0xF8E483 |
||
case 0x64EC83: // 83 EC 64 : sub esp, 64h | ||
return 3; | ||
case 0x24A48D: // 8D A4 24 XX XX XX XX : lea esp, [esp + XX XX XX XX] | ||
return 7; | ||
} | ||
|
@@ -549,6 +560,21 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { | |
case 0xA1: // A1 XX XX XX XX XX XX XX XX : | ||
// movabs eax, dword ptr ds:[XXXXXXXX] | ||
return 9; | ||
case 0xF2: | ||
switch (*(u32 *)(address + 1)) { | ||
case 0x2444110f: // f2 0f 11 44 24 XX movsd mmword ptr [rsp + | ||
// XX], xmm0 | ||
case 0x244c110f: // f2 0f 11 4c 24 XX movsd QWORD PTR | ||
// [rsp+0x8],xmm1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
case 0x2454110f: // f2 0f 11 54 24 XX movsd QWORD PTR | ||
// [rsp+0x8],xmm2 | ||
case 0x245c110f: // f2 0f 11 5c 24 XX movsd QWORD PTR | ||
// [rsp+0x8],xmm3 | ||
case 0x2464110f: // f2 0f 11 64 24 XX movsd QWORD PTR | ||
// [rsp+0x8],xmm4 | ||
return 6; | ||
} | ||
break; | ||
|
||
case 0x83: | ||
const u8 next_byte = *(u8*)(address + 1); | ||
|
@@ -573,53 +599,124 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { | |
case 0x5641: // push r14 | ||
case 0x5741: // push r15 | ||
case 0x9066: // Two-byte NOP | ||
case 0xc084: // test al, al | ||
case 0x018a: // mov al, byte ptr [rcx] | ||
case 0xC084: // test al, al | ||
case 0x018A: // mov al, byte ptr [rcx] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like a no-op change. |
||
return 2; | ||
|
||
case 0x7E80: // 80 7E YY XX cmp BYTE PTR [rsi+YY], XX | ||
case 0x7D80: // 80 7D YY XX cmp BYTE PTR [rbp+YY], XX | ||
case 0x7A80: // 80 7A YY XX cmp BYTE PTR [rdx+YY], XX | ||
case 0x7880: // 80 78 YY XX cmp BYTE PTR [rax+YY], XX | ||
case 0x7B80: // 80 7B YY XX cmp BYTE PTR [rbx+YY], XX | ||
case 0x7980: // 80 79 YY XX cmp BYTE ptr [rcx+YY], XX | ||
return 4; | ||
|
||
case 0x058B: // 8B 05 XX XX XX XX : mov eax, dword ptr [XX XX XX XX] | ||
if (rel_offset) | ||
*rel_offset = 2; | ||
return 6; | ||
|
||
case 0x7E81: // 81 7E YY XX XX XX XX cmp DWORD PTR [rsi+YY], XX XX XX XX | ||
case 0x7D81: // 81 7D YY XX XX XX XX cmp DWORD PTR [rbp+YY], XX XX XX XX | ||
case 0x7A81: // 81 7A YY XX XX XX XX cmp DWORD PTR [rdx+YY], XX XX XX XX | ||
case 0x7881: // 81 78 YY XX XX XX XX cmp DWORD PTR [rax+YY], XX XX XX XX | ||
case 0x7B81: // 81 7B YY XX XX XX XX cmp DWORD PTR [rbx+YY], XX XX XX XX | ||
case 0x7981: // 81 79 YY XX XX XX XX cmp dword ptr [rcx+YY], XX XX XX XX | ||
return 7; | ||
} | ||
|
||
switch (0x00FFFFFF & *(u32*)address) { | ||
case 0xe58948: // 48 8b c4 : mov rbp, rsp | ||
case 0xc18b48: // 48 8b c1 : mov rax, rcx | ||
case 0xc48b48: // 48 8b c4 : mov rax, rsp | ||
case 0xd9f748: // 48 f7 d9 : neg rcx | ||
case 0xd12b48: // 48 2b d1 : sub rdx, rcx | ||
case 0x07c1f6: // f6 c1 07 : test cl, 0x7 | ||
case 0xc98548: // 48 85 C9 : test rcx, rcx | ||
case 0xd28548: // 48 85 d2 : test rdx, rdx | ||
case 0xc0854d: // 4d 85 c0 : test r8, r8 | ||
case 0xc2b60f: // 0f b6 c2 : movzx eax, dl | ||
case 0xc03345: // 45 33 c0 : xor r8d, r8d | ||
case 0xc93345: // 45 33 c9 : xor r9d, r9d | ||
case 0xdb3345: // 45 33 DB : xor r11d, r11d | ||
case 0xd98b4c: // 4c 8b d9 : mov r11, rcx | ||
case 0xd28b4c: // 4c 8b d2 : mov r10, rdx | ||
case 0xc98b4c: // 4C 8B C9 : mov r9, rcx | ||
case 0xc18b4c: // 4C 8B C1 : mov r8, rcx | ||
case 0xd2b60f: // 0f b6 d2 : movzx edx, dl | ||
case 0xca2b48: // 48 2b ca : sub rcx, rdx | ||
case 0x10b70f: // 0f b7 10 : movzx edx, WORD PTR [rax] | ||
case 0xc00b4d: // 3d 0b c0 : or r8, r8 | ||
case 0xc08b41: // 41 8b c0 : mov eax, r8d | ||
case 0xd18b48: // 48 8b d1 : mov rdx, rcx | ||
case 0xdc8b4c: // 4c 8b dc : mov r11, rsp | ||
case 0xd18b4c: // 4c 8b d1 : mov r10, rcx | ||
case 0xE0E483: // 83 E4 E0 : and esp, 0xFFFFFFE0 | ||
case 0x07c1f6: // f6 c1 07 : test cl, 0x7 | ||
case 0x10b70f: // 0f b7 10 : movzx edx, word ptr [rax] | ||
case 0xc00b4d: // 4d 0b c0 : or r8, r8 | ||
case 0xc03345: // 45 33 c0 : xor r8d, r8d | ||
case 0xc08548: // 48 85 c0 : test rax, rax | ||
case 0xc0854d: // 4d 85 c0 : test r8, r8 | ||
case 0xc08b41: // 41 8b c0 : mov eax, r8d | ||
case 0xc0ff48: // 48 ff c0 : inc rax | ||
case 0xc0ff49: // 49 ff c0 : inc r8 | ||
case 0xc18b41: // 41 8b c1 : mov eax, r9d | ||
case 0xc18b48: // 48 8b c1 : mov rax, rcx | ||
case 0xc18b4c: // 4c 8b c1 : mov r8, rcx | ||
case 0xc1ff48: // 48 ff c1 : inc rcx | ||
case 0xc1ff49: // 49 ff c1 : inc r9 | ||
case 0xc28b41: // 41 8b c2 : mov eax, r10d | ||
case 0xc2b60f: // 0f b6 c2 : movzx eax, dl | ||
case 0xc2ff48: // 48 ff c2 : inc rdx | ||
case 0xc2ff49: // 49 ff c2 : inc r10 | ||
case 0xc38b41: // 41 8b c3 : mov eax, r11d | ||
case 0xc3ff48: // 48 ff c3 : inc rbx | ||
case 0xc3ff49: // 49 ff c3 : inc r11 | ||
case 0xc48b41: // 41 8b c4 : mov eax, r12d | ||
case 0xc48b48: // 48 8b c4 : mov rax, rsp | ||
case 0xc4ff49: // 49 ff c4 : inc r12 | ||
case 0xc5ff49: // 49 ff c5 : inc r13 | ||
case 0xc6ff48: // 48 ff c6 : inc rsi | ||
case 0xc6ff49: // 49 ff c6 : inc r14 | ||
case 0xc7ff48: // 48 ff c7 : inc rdi | ||
case 0xc7ff49: // 49 ff c7 : inc r15 | ||
case 0xc93345: // 45 33 c9 : xor r9d, r9d | ||
case 0xc98548: // 48 85 c9 : test rcx, rcx | ||
case 0xc9854d: // 4d 85 c9 : test r9, r9 | ||
case 0xc98b4c: // 4c 8b c9 : mov r9, rcx | ||
case 0xca2b48: // 48 2b ca : sub rcx, rdx | ||
case 0xd12b48: // 48 2b d1 : sub rdx, rcx | ||
case 0xd18b48: // 48 8b d1 : mov rdx, rcx | ||
case 0xd18b4c: // 4c 8b d1 : mov r10, rcx | ||
case 0xd28548: // 48 85 d2 : test rdx, rdx | ||
case 0xd2854d: // 4d 85 d2 : test r10, r10 | ||
case 0xd28b4c: // 4c 8b d2 : mov r10, rdx | ||
case 0xd2b60f: // 0f b6 d2 : movzx edx, dl | ||
case 0xd98b4c: // 4c 8b d9 : mov r11, rcx | ||
case 0xd9f748: // 48 f7 d9 : neg rcx | ||
case 0xdb3345: // 45 33 db : xor r11d, r11d | ||
case 0xdb8548: // 48 85 db : test rbx, rbx | ||
case 0xdb854d: // 4d 85 db : test r11, r11 | ||
case 0xdc8b4c: // 4c 8b dc : mov r11, rsp | ||
case 0xe0e483: // 83 e4 e0 : and esp, 0xffffffe0 | ||
case 0xe48548: // 48 85 e4 : test rsp, rsp | ||
case 0xe4854d: // 4d 85 e4 : test r12, r12 | ||
case 0xe58948: // 48 89 c4 : mov rbp, rsp | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like this comment was wrong both before and after, it should be 48 89 e5 |
||
case 0xed8548: // 48 85 ed : test rbp, rbp | ||
case 0xed854d: // 4d 85 ed : test r13, r13 | ||
case 0xf6854d: // 4d 85 f6 : test r14, r14 | ||
case 0xff854d: // 4d 85 ff : test r15, r15 | ||
return 3; | ||
|
||
case 0xec8348: // 48 83 ec XX : sub rsp, XX | ||
case 0xf88349: // 49 83 f8 XX : cmp r8, XX | ||
case 0x588948: // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx | ||
case 0x245489: // 89 54 24 XX : mov DWORD PTR[rsp + XX], edx | ||
case 0x428d44: // 44 8d 42 XX : lea r8d , [rdx + XX] | ||
case 0x588948: // 48 89 58 XX : mov QWORD PTR[rax + XX], rbx | ||
case 0xec8348: // 48 83 ec XX : sub rsp, XX | ||
case 0xf88349: // 49 83 f8 XX : cmp r8, XX | ||
return 4; | ||
|
||
case 0x246483: // 83 64 24 XX YY : and DWORD PTR [rsp+XX], YY | ||
return 5; | ||
|
||
case 0x788166: // 66 81 78 XX YY YY cmp WORD PTR [rax+XX], YY YY | ||
case 0x798166: // 66 81 79 XX YY YY cmp WORD PTR [rcx+XX], YY YY | ||
case 0x7a8166: // 66 81 7a XX YY YY cmp WORD PTR [rdx+XX], YY YY | ||
case 0x7b8166: // 66 81 7b XX YY YY cmp WORD PTR [rbx+XX], YY YY | ||
case 0x7e8166: // 66 81 7e XX YY YY cmp WORD PTR [rsi+XX], YY YY | ||
case 0x7f8166: // 66 81 7f XX YY YY cmp WORD PTR [rdi+XX], YY YY | ||
return 6; | ||
|
||
case 0xec8148: // 48 81 EC XX XX XX XX : sub rsp, XXXXXXXX | ||
return 7; | ||
|
||
// clang-format off | ||
case 0x788141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r8+YY], XX XX XX XX | ||
case 0x798141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r9+YY], XX XX XX XX | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. looks like the comment should be 41 81 79, and so on for the lines below |
||
case 0x7a8141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r10+YY], XX XX XX XX | ||
case 0x7b8141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r11+YY], XX XX XX XX | ||
case 0x7c8141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r12+YY], XX XX XX XX | ||
case 0x7d8141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r13+YY], XX XX XX XX | ||
case 0x7e8141: // 41 81 78 XX YY YY YY YY : cmp DWORD PTR [r14+YY], XX XX XX XX | ||
case 0x7f8141: // 41 81 78 YY XX XX XX XX cmp DWORD P [r15+YY], XX XX XX XX | ||
case 0x247c81: // 81 7c 24 YY XX XX XX XX cmp DWORD P [rsp+YY], XX XX XX XX | ||
return 8; | ||
// clang-format on | ||
|
||
case 0x058b48: // 48 8b 05 XX XX XX XX : | ||
// mov rax, QWORD PTR [rip + XXXXXXXX] | ||
case 0x25ff48: // 48 ff 25 XX XX XX XX : | ||
|
@@ -645,8 +742,11 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { | |
case 0x24548948: // 48 89 54 24 XX : mov QWORD PTR [rsp + XX], rdx | ||
case 0x244c894c: // 4c 89 4c 24 XX : mov QWORD PTR [rsp + XX], r9 | ||
case 0x2444894c: // 4c 89 44 24 XX : mov QWORD PTR [rsp + XX], r8 | ||
case 0x244c8944: // 44 89 4c 24 XX mov DWORD PTR [rsp + XX], r9d | ||
case 0x24448944: // 44 89 44 24 XX mov DWORD PTR [rsp + XX], r8d | ||
case 0x246c8d48: // 48 8d 6c 24 XX : lea rbp, [rsp + XX] | ||
return 5; | ||
case 0x24648348: // 48 83 64 24 XX : and QWORD PTR [rsp + XX], YY | ||
case 0x24648348: // 48 83 64 24 XX YY : and QWORD PTR [rsp + XX], YY | ||
return 6; | ||
} | ||
|
||
|
@@ -660,6 +760,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) { | |
case 0x458B: // 8B 45 XX : mov eax, dword ptr [ebp + XX] | ||
case 0x5D8B: // 8B 5D XX : mov ebx, dword ptr [ebp + XX] | ||
case 0x7D8B: // 8B 7D XX : mov edi, dword ptr [ebp + XX] | ||
case 0x758B: // 8B 75 XX : mov esi, dword ptr [ebp + XX] | ||
case 0xEC83: // 83 EC XX : sub esp, XX | ||
case 0x75FF: // FF 75 XX : push dword ptr [ebp + XX] | ||
return 3; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Surely that's processor mode dependent, right? I think we can safely say that our decodings are only valid for x86/x64.