aboutsummaryrefslogtreecommitdiffstats
path: root/asm/creole.py
diff options
context:
space:
mode:
authorGravatar Peter McGoron 2023-02-20 18:45:21 +0000
committerGravatar Peter McGoron 2023-02-20 18:45:21 +0000
commit3dd29b100483cb9eb34d96f6ed21827e86cd77a2 (patch)
treeb07bf066bad51f43c0225da682631ddde9f4e5fb /asm/creole.py
parentadjust python assembler to new API (diff)
add label parser that can deal with forward jumps; all tests pass
Diffstat (limited to 'asm/creole.py')
-rw-r--r--asm/creole.py152
1 files changed, 102 insertions, 50 deletions
diff --git a/asm/creole.py b/asm/creole.py
index 559d663..a614701 100644
--- a/asm/creole.py
+++ b/asm/creole.py
@@ -71,7 +71,7 @@ class Argument:
class StringArgument(Argument):
def __init__(self, *args, **kwargs):
- super().__init__(self, *args, **kwargs)
+ super().__init__(*args, **kwargs)
def __bytes__(self):
b = bytes()
for v in self.val:
@@ -80,9 +80,23 @@ class StringArgument(Argument):
class LabelArgument(Argument):
def __init__(self, *args, **kwargs):
- super().__init__(self, *args, **kwargs)
- def load_label(self, labels):
- self.val = labels[val]
+ super().__init__(*args, **kwargs)
+ def __call__(self):
+ return self.val
+
+class TypecheckException(Exception):
+ """ Exception thrown when an argument to an instruction are of the
+ incorrect type. """
+ def __init__(self, got, argtype, sarg, opcode, i):
+ self.argtype = argtype
+ self.sarg = sarg
+ self.opcode = opcode
+ self.got = got
+ self.i = i
+ self.message = f'''\
+opcode {self.opcode.name} has invalid value {self.sarg} ({self.got})
+at {self.i} (expected {self.argtype})\
+'''
class ArgType(Enum):
""" Class denoting the type of an argument to an instruction. """
@@ -135,27 +149,29 @@ class ArgType(Enum):
elif s[0] == 'd' and s[1:].isnumeric():
return Argument(ArgType.DAT, int(s[1:]))
elif s[0] == '.':
- return Argument(ArgType.LAB, s[1:])
+ return LabelArgument(ArgType.LAB, s[:])
else:
raise MalformedArgument(s)
- def typecheck(self, s):
+ def typecheck(self, s, opcode, i):
""" Parses the type of the string and returns it if it fits
the type of the enum value.
:param s: String argument representing an argument.
- :return: The Argument class containing the object, or None
- if the string does not fit the type of self. """
+ :param opcode: Opcode of the argument. Used for debugging.
+ :param i: Argument number. Used for debugging.
+ :return: The Argument class containing the object.
+ :raises TypecheckException: """
t = ArgType.gettype(s)
if self == ArgType.VAL:
if t.at == ArgType.REG or t.at == ArgType.IMM:
return t
else:
- return None
+ raise TypecheckException(t.at, self, s, opcode, i)
elif t.at == self:
return t
else:
- return None
+ raise TypecheckException(t.at, self, s, opcode, i)
class OpcodeException(Exception):
pass
@@ -168,20 +184,7 @@ class TypecheckLenException(Exception):
self.argtypelen = argtypelen
def __str__(self):
return f'''\
-arguments {self.insargs} to opcode {self.opcode} not of length {self.argtypelen}\
-'''
-class TypecheckException(Exception):
- """ Exception thrown when an argument to an instruction are of the
- incorrect type. """
- def __init__(self, argtype, sarg, i, opcode):
- self.argtype = argtype
- self.sarg = sarg
- self.i = i
- self.opcode = opcode
- def __str__(self):
- return f'''\
-opcode {self.opcode} has invalid value {self.sarg}
-(expected {self.argtype} in position {self.i})\
+arguments {self.insargs} to opcode {self.opcode.name} not of length {self.argtypelen}\
'''
class Instruction(Enum):
@@ -248,19 +251,14 @@ class Instruction(Enum):
:param sargs: List of arguments to the instruction
as strings.
:return: List of arguments (as Argument objects).
- :raises TypeCheckException:
:raises TypecheckLenException:
"""
rargs = []
if len(sargs) != len(self.argtypes):
- raise TypecheckLenException(self.opcode, sargs,
+ raise TypecheckLenException(self, sargs,
len(self.argtypes))
for i in range(0, len(sargs)):
- t = self.argtypes[i].typecheck(sargs[i])
- if t is None:
- raise TypecheckException(self.argtypes[i],
- sargs[i],
- i, self.opcode)
+ t = self.argtypes[i].typecheck(sargs[i], self, i)
rargs.append(t)
return rargs
@@ -280,10 +278,11 @@ class Instruction(Enum):
return Instruction[self.opcode].render(args)
def _render_default(self, args):
- b = bytes([self.opcode])
+ comps = [bytes([self.opcode])]
for a in args:
- b = b + a()
- return b + bytes([0])
+ comps.append(a())
+ comps.append(b'\x00')
+ return comps
encoding_types = {
# start mask B
@@ -296,6 +295,12 @@ encoding_types = {
# B : Total number of bits excluding high bits
}
+def pseudo_utf8_len(n):
+ for k in sorted(encoding_types):
+ if n <= encoding_types[k][0]:
+ return k
+ return None
+
class InvalidNumberException(Exception):
pass
class InvalidLengthException(Exception):
@@ -304,10 +309,7 @@ def encode_pseudo_utf8(n, high_bits, to):
if n < 0:
raise InvalidNumberException(n)
if to is None or to < 0:
- for k in sorted(encoding_types):
- if n <= encoding_types[k][0]:
- to = k
- break
+ to = pseudo_utf8_len(n)
if to is None:
raise InvalidNumberException(n)
if to > 8 or to < 0:
@@ -343,27 +345,26 @@ class Line:
raise RangeCheckException(a.at,
a.val,
reglen)
-
- def load_label(self, labels):
- for a in self.args:
- if a.at == ArgType.LAB:
- a.load_label(labels)
def __call__(self):
return self.ins.render(self.args)
class InstructionNotFoundException(Exception):
pass
class Program:
- def asm_push_line(self, ins, args):
+ def _asm_push_line(self, ins, args):
l = Line(ins, args)
l.check_line(self.lablen, self.reglen)
self.asm.append(l)
def parse_asm_line(self, line):
+ """ Parse and add a single assembly line to the program.
+ :param line: String containing the line.
+ :raises InstructionNotFoundException:
+ """
line = line.strip().split()
line[0] = line[0].casefold()
if line[0][0] == '.':
- self.asm.append(line[0][1:])
+ self.asm.append(line[0])
return None
try:
@@ -372,21 +373,72 @@ class Program:
raise InstructionNotFoundException(line[0])
args_w_type = ins.typecheck(line[1:])
- self.asm_push_line(ins, args_w_type)
+ self._asm_push_line(ins, args_w_type)
def parse_lines(self, lines):
+ """ Parse a list of lines. See parse_asm_line.
+ :param lines: List of assembly lines.
+ """
for l in lines:
self.parse_asm_line(l)
def __call__(self):
- b = bytes()
+ """ Generate bytecode. """
+
+ # Labels may jump forward in the program, which means
+ # multiple passes are required to properly calculate
+ # jump locations.
+ # This algorithm makes every jump destination the same
+ # width in each operation, and calculates the smallest
+ # width that will allow all labels to jump to any location
+ # in the program.
+ # The algorithm calculates the length of the program
+ # with all jump arguments given a length of 0. Each label
+ # is noted with its offset in the program (with all jump
+ # arguments given zero length) and the amount of jump arguments
+ # that occur prior to the label.
+ # When the code is emitted, the label length is properly
+ # calculated with the length of each label.
+ # This method is not optimal, but will work well for small
+ # programs.
+
+ ins = []
+ curlen = 0
+
+ # This dictonary contains a tuple (len, refs)
+ # that denotes that a label points to len + lablen*refs
+ # where lablen is a to-be-determined number.
labels = {}
+ labelrefs = 0
for line in self.asm:
if type(line) is str:
- labels[line] = len(b)
+ labels[line] = (curlen, labelrefs)
continue
- line.load_label(labels)
- b = b + line()
+
+ next_ins = line()
+ for v in next_ins:
+ if type(v) is str:
+ labelrefs += 1
+ else:
+ curlen += len(v)
+ ins.append(next_ins)
+
+ # Calculate a label length, such that the entire program
+ # can be contained in this length.
+ for i in encoding_types:
+ if curlen + labelrefs*i < encoding_types[i][0]:
+ lablen = i
+ break
+
+ # Emit bytecode.
+ b = bytes()
+ for line in ins:
+ for arg in line:
+ if type(arg) is str:
+ off = labels[arg][0] + labels[arg][1]*lablen
+ arg = encode_pseudo_utf8(off, 0, lablen)
+ b = b + arg
+ assert len(b) < encoding_types[lablen][0]
return b
def __init__(self, lablen=16, reglen=16):