-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathstatement.py
294 lines (252 loc) · 12 KB
/
statement.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
"""
Copyright (C) 2013-2020 Craig Thomas
This project uses an MIT style license - see LICENSE for details.
A Color Computer Assembler - see the README.md file for details.
"""
# I M P O R T S ###############################################################
import re
from copy import copy
from cocoasm.exceptions import ParseError, TranslationError, OperandTypeError
from cocoasm.instruction import INSTRUCTIONS, CodePackage
from cocoasm.operands import Operand, BadInstructionOperand
from cocoasm.values import NumericValue
# C O N S T A N T S ###########################################################
# Pattern to recognize a blank line
BLANK_LINE_REGEX = re.compile(r"^\s*$")
# Pattern to parse a comment line
COMMENT_LINE_REGEX = re.compile(r"^\s*;\s*(?P<comment>.*)$")
# Pattern to parse a single line
ASM_LINE_REGEX = re.compile(
r"^(?P<label>[\w@]*)\s+(?P<mnemonic>\w*)\s+(?P<operands>[\w\[\]><'\"@:,.#?$%^&*()=!+\-/]*)\s*;*(?P<comment>.*)$"
)
# Pattern to recognize a direct value
DIR_REGEX = re.compile(
r"^<(?P<value>.*)"
)
# C L A S S E S ##############################################################
class Statement(object):
"""
The Statement class represents a single line of assembly language. Each
statement is constructed from a single line that has the following format:
LABEL MNEMONIC OPERANDS COMMENT
The statement can be parsed and translated to its Chip8 machine code
equivalent.
"""
def __init__(self, line):
self.is_empty = True
self.is_comment_only = False
self.instruction = None
self.label = ""
self.operand = None
self.original_operand = None
self.comment = None
self.mnemonic = ""
self.state = None
self.fixed_size = True
self.pcr_size_hint = 2
self.code_pkg = CodePackage()
self.parse_line(line)
def __str__(self):
op_code_string = ""
op_code_string += self.code_pkg.op_code.hex()
op_code_string += self.code_pkg.post_byte.hex()
op_code_string += self.code_pkg.additional.hex()
return "${} {:.10} {} {} {} ; {}".format(
self.code_pkg.address.hex(size=4),
op_code_string.ljust(10, ' '),
self.label.rjust(10, ' '),
self.mnemonic.rjust(5, ' '),
self.original_operand.operand_string.ljust(30, ' '),
self.comment.ljust(40, ' '),
# self.operand.type
)
def __eq__(self, other):
return self.is_empty == other.is_empty and \
self.is_comment_only == other.is_comment_only and \
self.instruction == other.instruction and \
self.label == other.label and \
self.comment == other.comment and \
self.mnemonic == other.mnemonic and \
self.state == other.state and \
self.fixed_size == other.fixed_size and \
self.pcr_size_hint == other.pcr_size_hint
def get_include_filename(self):
"""
Returns the name of the file to include in the current stream of
statements if the statement is the pseudo op INCLUDE, and there is
a value for the operand
:return: the name of the file to include
"""
return self.operand.operand_string if self.instruction.is_include else None
def parse_line(self, line):
"""
Parse a line of assembly language text.
:param line: the line of text to parse
"""
if BLANK_LINE_REGEX.search(line):
return
data = COMMENT_LINE_REGEX.match(line)
if data:
self.is_empty = False
self.is_comment_only = True
self.comment = data.group("comment").strip()
return
data = ASM_LINE_REGEX.match(line)
if data:
self.label = data.group("label") or ""
self.mnemonic = data.group("mnemonic").upper() or ""
self.instruction = next((op for op in INSTRUCTIONS if op.mnemonic == self.mnemonic), None)
self.original_operand = copy(self.operand)
if not self.instruction:
self.original_operand = BadInstructionOperand(data.group("operands"), self.instruction)
self.comment = data.group("comment")
raise ParseError("[{}] invalid mnemonic".format(self.mnemonic), line)
if self.instruction.is_string_define:
original_operand = data.group("operands")
if data.group("comment"):
original_operand = "{} {}".format(data.group("operands"), data.group("comment").strip())
starting_symbol = original_operand[0]
ending_location = original_operand.find(starting_symbol, 1)
self.operand = Operand.create_from_str(
original_operand[0:ending_location + 1].strip(),
self.instruction
)
self.original_operand = copy(self.operand)
self.comment = original_operand[ending_location + 2:].strip() or ""
self.is_empty = False
else:
try:
self.operand = Operand.create_from_str(data.group("operands"), self.instruction)
self.original_operand = copy(self.operand)
self.comment = data.group("comment").strip() or ""
self.is_empty = False
except OperandTypeError as error:
raise ParseError(str(error), line)
return
raise ParseError("Could not parse line", line)
def set_address(self, address):
"""
This function sets the address where this statement should be located
in memory. If the address is not already set, it will set the address
and return the address that was set. If the address was already set
(for example, in an ORG operation), it will return that address
instead.
:param address: the address to set for the statement
:return: the address that was set or returned
"""
if not self.code_pkg.address.is_none():
return self.code_pkg.address.int
self.code_pkg.address = NumericValue(address)
return self.code_pkg.address.int
def resolve_symbols(self, symbol_table):
"""
Resolve any symbols within operands, and check to make sure operand types
are valid.
"""
try:
self.operand = self.operand.resolve_symbols(symbol_table)
except Exception as error:
raise TranslationError(str(error), self)
def translate(self):
"""
Translate the mnemonic into an actual operation.
"""
try:
self.code_pkg = self.operand.translate()
self.fixed_size = not (self.code_pkg.additional_needs_resolution or self.code_pkg.post_byte_choices)
except Exception as error:
raise TranslationError(str(error), self)
def determine_pcr_relative_sizes(self, statements, this_index):
"""
Given a PCR relative operation, determine whether we have an 8-bit or 16-bit offset
from the program counter. Mark the correct size for the statement when complete,
so that other program counter relative checks can complete.
:param statements: the full set of statements that make up the program
:param this_index: the index that this instruction occurs at
"""
# TODO: implement detection of 5-bit offsets as an optimization
min_size = 0
max_size = 0
positive_range = True
rel_index = self.code_pkg.additional.int
if self.operand.left.is_address_expression():
rel_index = self.operand.left.extract_address_index_from_expression()
range_count = range(this_index, rel_index)
if rel_index < this_index:
positive_range = False
range_count = range(rel_index, this_index)
for x in range_count:
max_size += statements[x].code_pkg.max_size
min_size += statements[x].code_pkg.size
raw_post_byte = self.code_pkg.post_byte.int
max_size += 2
min_size += 2
if positive_range:
if min_size <= 127 and max_size <= 127:
self.code_pkg.size += 1
self.code_pkg.max_size = self.code_pkg.size
self.pcr_size_hint = 2
self.fixed_size = True
raw_post_byte |= self.code_pkg.post_byte_choices[0]
self.code_pkg.post_byte = NumericValue(raw_post_byte)
elif min_size > 127 and max_size > 127:
self.code_pkg.size += 2
self.code_pkg.max_size = self.code_pkg.size
self.pcr_size_hint = 4
self.fixed_size = True
raw_post_byte |= self.code_pkg.post_byte_choices[1]
self.code_pkg.post_byte = NumericValue(raw_post_byte)
else:
if min_size <= 128 and max_size <= 128:
self.code_pkg.size += 1
self.code_pkg.max_size = self.code_pkg.size
self.pcr_size_hint = 2
self.fixed_size = True
raw_post_byte |= self.code_pkg.post_byte_choices[0]
self.code_pkg.post_byte = NumericValue(raw_post_byte)
elif min_size > 128 and max_size > 128:
self.code_pkg.size += 2
self.code_pkg.max_size = self.code_pkg.size
self.pcr_size_hint = 4
self.fixed_size = True
raw_post_byte |= self.code_pkg.post_byte_choices[1]
self.code_pkg.post_byte = NumericValue(raw_post_byte)
def fix_addresses(self, statements, this_index):
"""
Once all of the statements have been translated, all of the addresses
must be 'fixed'. In particular, branch operations need to know how
many statements they need to skip ahead or behind, and the address
at the target statement. This function calculates what the target
of a branch, jump or subroutine call needs to go to, and inserts
it in the code package for the assembled instruction.
:param statements: the full set of statements that make up the program
:param this_index: the index that this instruction occurs at
"""
if self.operand.is_relative():
base_value = 0x101 if self.instruction.is_short_branch else 0x10001
branch_index = self.code_pkg.additional.int
size_hint = 2 if self.instruction.is_short_branch else 4
length = 0
if branch_index < this_index:
length = 1
for statement in statements[branch_index:this_index+1]:
length += statement.code_pkg.size
self.code_pkg.additional = NumericValue(base_value - length, size_hint=size_hint)
else:
for statement in statements[this_index+1:branch_index]:
length += statement.code_pkg.size
self.code_pkg.additional = NumericValue(length, size_hint=size_hint)
return
if self.operand.value.is_address_expression():
self.code_pkg.additional = self.operand.value.calculate_address_offset(statements)
if self.operand.value.is_address():
self.code_pkg.additional = statements[self.operand.value.int].code_pkg.address
if self.code_pkg.additional_needs_resolution:
if self.operand.is_indexed() and self.operand.left and self.operand.left.is_address_expression():
relative_address = self.operand.left.calculate_address_offset(statements).int
else:
relative_address = statements[self.code_pkg.additional.int].code_pkg.address.int
start_address = statements[this_index].code_pkg.address.int
jump_amount = relative_address - start_address - self.code_pkg.size
self.code_pkg.additional = NumericValue(jump_amount, size_hint=self.pcr_size_hint)
# E N D O F F I L E #######################################################