-
Notifications
You must be signed in to change notification settings - Fork 4
/
assembly_bot.py
143 lines (115 loc) · 4.28 KB
/
assembly_bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python3.7
# -*- coding: utf-8 -*-
import binascii
import html
import re
from aiogram.types import Message
from capstone import CS_ARCH_X86, CS_MODE_32, CS_MODE_64, Cs
from keystone import KS_ARCH_X86, KS_MODE_32, KS_MODE_64, Ks
class BotException(Exception):
"""Base class for exceptions raised by the bot"""
class AssemblyBot:
_COMMAND_REGEX = re.compile(
r"^\s*/(?P<command>help|about).*$", re.DOTALL | re.IGNORECASE
)
_MESSAGE_REGEX = re.compile(
r"^(?:\s*\(\s*(?P<arch>x86|x64)\s*\)\s*)?"
r"(?:(?P<bytes>(?:[0-9a-f]{2})+)|"
r"(?P<assembly>\S.*))$",
re.DOTALL | re.IGNORECASE,
)
_USAGE_TEXT = """I can assemble and disassemble various instructions.
To assemble, send me a message in the following format:
<pre> (arch) instruction1; instruction2</pre>
You can also separate instructions with newlines.
For example:
<pre> (x86) xor eax, eax</pre>
Or:
<pre> (x64)
begin:
call get_eip
get_eip:
pop eax
sub eax, get_eip - begin
ret</pre>
To disassemble, send me a message in the following format:
<pre> (arch) hex text</pre>
For example:
<pre> (x64) c3</pre>
Currently, the supported architectures are:
- x86
- x64
If the architecture is omitted, x86 is assumed.
"""
_ABOUT_TEXT = (
"I'm AssemblyBot. My code is available on "
'<a href="https://github.com/mbikovitsky/AssemblyBot">GitHub</a>.'
)
async def on_chat_message(self, message: Message):
try:
if not message.text:
raise BotException("Message content not understood.")
if self._is_command(message.text):
result = self._process_command_text(message.text)
else:
result = self._process_query_text(message.text)
await message.answer(result, parse_mode="HTML")
except Exception as exception: # pylint: disable=broad-except
await message.answer(
self._format_as_html(f"ERROR: {exception}"), parse_mode="HTML"
)
@staticmethod
def _format_as_html(text):
return f"<pre>{html.escape(text)}</pre>"
@classmethod
def _is_command(cls, text):
return bool(cls._COMMAND_REGEX.fullmatch(text))
@classmethod
def _process_command_text(cls, text):
match = cls._COMMAND_REGEX.fullmatch(text)
if not match:
raise BotException("Unrecognized command.")
command = match.group("command").lower()
if command == "help":
return cls._USAGE_TEXT
elif command == "about":
return cls._ABOUT_TEXT
else:
raise BotException("Unrecognized command.")
@classmethod
def _process_query_text(cls, text):
match = cls._MESSAGE_REGEX.fullmatch(text)
if not match:
raise BotException("Syntax error.")
if match.group("bytes"):
result = cls._process_bytes(match.group("arch"), match.group("bytes"))
elif match.group("assembly"):
result = cls._process_assembly(match.group("arch"), match.group("assembly"))
else:
raise BotException("Not supported.")
return cls._format_as_html(result)
@staticmethod
def _process_bytes(architecture, raw_bytes):
architecture = architecture.lower() if architecture else "x86"
if architecture == "x86":
disassembler = Cs(CS_ARCH_X86, CS_MODE_32)
elif architecture == "x64":
disassembler = Cs(CS_ARCH_X86, CS_MODE_64)
else:
raise BotException("Unsupported architecture.")
binary = binascii.unhexlify(raw_bytes)
return "\n".join(
f"0x{address:x}:\t{mnemonic}\t{op_str}"
for address, _size, mnemonic, op_str in disassembler.disasm_lite(binary, 0)
)
@staticmethod
def _process_assembly(architecture, text):
architecture = architecture.lower() if architecture else "x86"
if architecture == "x86":
assembler = Ks(KS_ARCH_X86, KS_MODE_32)
elif architecture == "x64":
assembler = Ks(KS_ARCH_X86, KS_MODE_64)
else:
raise BotException("Unsupported architecture.")
assembler_output = bytes(assembler.asm(text.encode("UTF-8"))[0])
return binascii.hexlify(assembler_output).decode("UTF-8")