add label parser that can deal with forward jumps; all tests pass

This commit is contained in:
Peter McGoron 2023-02-20 18:45:21 +00:00
parent 48827c5b74
commit 3dd29b1004
2 changed files with 138 additions and 112 deletions

View File

@ -71,7 +71,7 @@ class Argument:
class StringArgument(Argument):
def __init__(self, *args, **kwargs):
super().__init__(self, *args, **kwargs)
super().__init__(*args, **kwargs)
def __bytes__(self):
b = bytes()
for v in self.val:
@ -80,9 +80,23 @@ class StringArgument(Argument):
class LabelArgument(Argument):
def __init__(self, *args, **kwargs):
super().__init__(self, *args, **kwargs)
def load_label(self, labels):
self.val = labels[val]
super().__init__(*args, **kwargs)
def __call__(self):
return self.val
class TypecheckException(Exception):
""" Exception thrown when an argument to an instruction are of the
incorrect type. """
def __init__(self, got, argtype, sarg, opcode, i):
self.argtype = argtype
self.sarg = sarg
self.opcode = opcode
self.got = got
self.i = i
self.message = f'''\
opcode {self.opcode.name} has invalid value {self.sarg} ({self.got})
at {self.i} (expected {self.argtype})\
'''
class ArgType(Enum):
""" Class denoting the type of an argument to an instruction. """
@ -135,27 +149,29 @@ class ArgType(Enum):
elif s[0] == 'd' and s[1:].isnumeric():
return Argument(ArgType.DAT, int(s[1:]))
elif s[0] == '.':
return Argument(ArgType.LAB, s[1:])
return LabelArgument(ArgType.LAB, s[:])
else:
raise MalformedArgument(s)
def typecheck(self, s):
def typecheck(self, s, opcode, i):
""" Parses the type of the string and returns it if it fits
the type of the enum value.
:param s: String argument representing an argument.
:return: The Argument class containing the object, or None
if the string does not fit the type of self. """
:param opcode: Opcode of the argument. Used for debugging.
:param i: Argument number. Used for debugging.
:return: The Argument class containing the object.
:raises TypecheckException: """
t = ArgType.gettype(s)
if self == ArgType.VAL:
if t.at == ArgType.REG or t.at == ArgType.IMM:
return t
else:
return None
raise TypecheckException(t.at, self, s, opcode, i)
elif t.at == self:
return t
else:
return None
raise TypecheckException(t.at, self, s, opcode, i)
class OpcodeException(Exception):
pass
@ -168,20 +184,7 @@ class TypecheckLenException(Exception):
self.argtypelen = argtypelen
def __str__(self):
return f'''\
arguments {self.insargs} to opcode {self.opcode} not of length {self.argtypelen}\
'''
class TypecheckException(Exception):
""" Exception thrown when an argument to an instruction are of the
incorrect type. """
def __init__(self, argtype, sarg, i, opcode):
self.argtype = argtype
self.sarg = sarg
self.i = i
self.opcode = opcode
def __str__(self):
return f'''\
opcode {self.opcode} has invalid value {self.sarg}
(expected {self.argtype} in position {self.i})\
arguments {self.insargs} to opcode {self.opcode.name} not of length {self.argtypelen}\
'''
class Instruction(Enum):
@ -248,19 +251,14 @@ class Instruction(Enum):
:param sargs: List of arguments to the instruction
as strings.
:return: List of arguments (as Argument objects).
:raises TypeCheckException:
:raises TypecheckLenException:
"""
rargs = []
if len(sargs) != len(self.argtypes):
raise TypecheckLenException(self.opcode, sargs,
raise TypecheckLenException(self, sargs,
len(self.argtypes))
for i in range(0, len(sargs)):
t = self.argtypes[i].typecheck(sargs[i])
if t is None:
raise TypecheckException(self.argtypes[i],
sargs[i],
i, self.opcode)
t = self.argtypes[i].typecheck(sargs[i], self, i)
rargs.append(t)
return rargs
@ -280,10 +278,11 @@ class Instruction(Enum):
return Instruction[self.opcode].render(args)
def _render_default(self, args):
b = bytes([self.opcode])
comps = [bytes([self.opcode])]
for a in args:
b = b + a()
return b + bytes([0])
comps.append(a())
comps.append(b'\x00')
return comps
encoding_types = {
# start mask B
@ -296,6 +295,12 @@ encoding_types = {
# B : Total number of bits excluding high bits
}
def pseudo_utf8_len(n):
for k in sorted(encoding_types):
if n <= encoding_types[k][0]:
return k
return None
class InvalidNumberException(Exception):
pass
class InvalidLengthException(Exception):
@ -304,10 +309,7 @@ def encode_pseudo_utf8(n, high_bits, to):
if n < 0:
raise InvalidNumberException(n)
if to is None or to < 0:
for k in sorted(encoding_types):
if n <= encoding_types[k][0]:
to = k
break
to = pseudo_utf8_len(n)
if to is None:
raise InvalidNumberException(n)
if to > 8 or to < 0:
@ -343,27 +345,26 @@ class Line:
raise RangeCheckException(a.at,
a.val,
reglen)
def load_label(self, labels):
for a in self.args:
if a.at == ArgType.LAB:
a.load_label(labels)
def __call__(self):
return self.ins.render(self.args)
class InstructionNotFoundException(Exception):
pass
class Program:
def asm_push_line(self, ins, args):
def _asm_push_line(self, ins, args):
l = Line(ins, args)
l.check_line(self.lablen, self.reglen)
self.asm.append(l)
def parse_asm_line(self, line):
""" Parse and add a single assembly line to the program.
:param line: String containing the line.
:raises InstructionNotFoundException:
"""
line = line.strip().split()
line[0] = line[0].casefold()
if line[0][0] == '.':
self.asm.append(line[0][1:])
self.asm.append(line[0])
return None
try:
@ -372,21 +373,72 @@ class Program:
raise InstructionNotFoundException(line[0])
args_w_type = ins.typecheck(line[1:])
self.asm_push_line(ins, args_w_type)
self._asm_push_line(ins, args_w_type)
def parse_lines(self, lines):
""" Parse a list of lines. See parse_asm_line.
:param lines: List of assembly lines.
"""
for l in lines:
self.parse_asm_line(l)
def __call__(self):
b = bytes()
""" Generate bytecode. """
# Labels may jump forward in the program, which means
# multiple passes are required to properly calculate
# jump locations.
# This algorithm makes every jump destination the same
# width in each operation, and calculates the smallest
# width that will allow all labels to jump to any location
# in the program.
# The algorithm calculates the length of the program
# with all jump arguments given a length of 0. Each label
# is noted with its offset in the program (with all jump
# arguments given zero length) and the amount of jump arguments
# that occur prior to the label.
# When the code is emitted, the label length is properly
# calculated with the length of each label.
# This method is not optimal, but will work well for small
# programs.
ins = []
curlen = 0
# This dictonary contains a tuple (len, refs)
# that denotes that a label points to len + lablen*refs
# where lablen is a to-be-determined number.
labels = {}
labelrefs = 0
for line in self.asm:
if type(line) is str:
labels[line] = len(b)
labels[line] = (curlen, labelrefs)
continue
line.load_label(labels)
b = b + line()
next_ins = line()
for v in next_ins:
if type(v) is str:
labelrefs += 1
else:
curlen += len(v)
ins.append(next_ins)
# Calculate a label length, such that the entire program
# can be contained in this length.
for i in encoding_types:
if curlen + labelrefs*i < encoding_types[i][0]:
lablen = i
break
# Emit bytecode.
b = bytes()
for line in ins:
for arg in line:
if type(arg) is str:
off = labels[arg][0] + labels[arg][1]*lablen
arg = encode_pseudo_utf8(off, 0, lablen)
b = b + arg
assert len(b) < encoding_types[lablen][0]
return b
def __init__(self, lablen=16, reglen=16):

View File

@ -30,17 +30,17 @@ class PushTest(unittest.TestCase):
def test_parse_push_catch_typecheck_push_lab(self):
p = Program()
with self.assertRaises(TypecheckException) as cm:
p.parse_asm_line("push l0")
p.parse_asm_line("push .l0")
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, 'l0')
self.assertEqual(cm.exception.sarg, '.l0')
self.assertEqual(cm.exception.i, 0)
self.assertEqual(cm.exception.opcode, 1)
self.assertEqual(cm.exception.opcode, Instruction.PUSH)
def test_parse_push_catch_typecheck_argument_overflow(self):
p = Program()
with self.assertRaises(TypecheckLenException) as cm:
p.parse_asm_line("push r1 r2")
self.assertEqual(cm.exception.opcode, 1)
self.assertEqual(cm.exception.opcode, Instruction.PUSH)
self.assertEqual(cm.exception.insargs, ["r1", "r2"])
self.assertEqual(cm.exception.argtypelen, 1)
@ -48,7 +48,7 @@ class PushTest(unittest.TestCase):
p = Program()
with self.assertRaises(TypecheckLenException) as cm:
p.parse_asm_line("push")
self.assertEqual(cm.exception.opcode, 1)
self.assertEqual(cm.exception.opcode, Instruction.PUSH)
self.assertEqual(cm.exception.insargs, [])
self.assertEqual(cm.exception.argtypelen, 1)
@ -64,23 +64,6 @@ class PushTest(unittest.TestCase):
p.parse_asm_line("PUSH 6")
ex = ffi.Environment(p())
self.assertEqual(ex.cenv.prgend, 2)
self.assertEqual(ex.cenv.prg[0].opcode, 1)
self.assertEqual(ex.cenv.prg[0].w_flags[0], 1)
self.assertEqual(ex.cenv.prg[0].w_flags[1], 0)
self.assertEqual(ex.cenv.prg[0].w_flags[2], 0)
self.assertEqual(ex.cenv.prg[0].w[0], 0)
self.assertEqual(ex.cenv.prg[0].w[1], 0)
self.assertEqual(ex.cenv.prg[0].w[2], 0)
self.assertEqual(ex.cenv.prg[1].opcode, 1)
self.assertEqual(ex.cenv.prg[1].w_flags[0], 0)
self.assertEqual(ex.cenv.prg[1].w_flags[1], 0)
self.assertEqual(ex.cenv.prg[1].w_flags[2], 0)
self.assertEqual(ex.cenv.prg[1].w[0], 6)
self.assertEqual(ex.cenv.prg[1].w[1], 0)
self.assertEqual(ex.cenv.prg[1].w[2], 0)
def test_push_many(self):
p = Program()
@ -107,15 +90,6 @@ class PopTest(unittest.TestCase):
b = p()
self.assertEqual(b, b'\x02\xC2\x89\x00')
ex = ffi.Environment(b)
self.assertEqual(ex.cenv.prgend, 1)
self.assertEqual(ex.cenv.prg[0].opcode, 2)
self.assertEqual(ex.cenv.prg[0].w_flags[0], 1)
self.assertEqual(ex.cenv.prg[0].w_flags[1], 0)
self.assertEqual(ex.cenv.prg[0].w_flags[2], 0)
self.assertEqual(ex.cenv.prg[0].w[0], 9)
self.assertEqual(ex.cenv.prg[0].w[1], 0)
self.assertEqual(ex.cenv.prg[0].w[2], 0)
def test_compile_throw_pop_literal(self):
p = Program()
@ -124,22 +98,22 @@ class PopTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, '6')
self.assertEqual(cm.exception.i, 0)
self.assertEqual(cm.exception.opcode, 2)
self.assertEqual(cm.exception.opcode, Instruction.POP)
def test_compile_throw_pop_label(self):
p = Program()
with self.assertRaises(TypecheckException) as cm:
p.parse_asm_line("pop l9")
p.parse_asm_line("pop .l9")
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, 'l9')
self.assertEqual(cm.exception.sarg, '.l9')
self.assertEqual(cm.exception.i, 0)
self.assertEqual(cm.exception.opcode, 2)
self.assertEqual(cm.exception.opcode, Instruction.POP)
def test_compile_throw_argument_overflow(self):
p = Program()
with self.assertRaises(TypecheckLenException) as cm:
p.parse_asm_line("pop r1 r2")
self.assertEqual(cm.exception.opcode, 2)
self.assertEqual(cm.exception.opcode, Instruction.POP)
self.assertEqual(cm.exception.insargs, ["r1", "r2"])
self.assertEqual(cm.exception.argtypelen, 1)
@ -147,7 +121,7 @@ class PopTest(unittest.TestCase):
p = Program()
with self.assertRaises(TypecheckLenException) as cm:
p.parse_asm_line("pop")
self.assertEqual(cm.exception.opcode, 2)
self.assertEqual(cm.exception.opcode, Instruction.POP)
self.assertEqual(cm.exception.insargs, [])
def test_pop_underflow(self):
@ -189,7 +163,7 @@ class AddTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, '5')
self.assertEqual(cm.exception.i, 0)
self.assertEqual(cm.exception.opcode, 3)
self.assertEqual(cm.exception.opcode, Instruction.ADD)
def test_exec_add_throw_lab_1(self):
p = Program()
@ -198,7 +172,7 @@ class AddTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, '.label')
self.assertEqual(cm.exception.i, 1)
self.assertEqual(cm.exception.opcode, 3)
self.assertEqual(cm.exception.opcode, Instruction.ADD)
def test_exec_add_throw_lab_2(self):
p = Program()
@ -207,7 +181,7 @@ class AddTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, '.ab')
self.assertEqual(cm.exception.i, 2)
self.assertEqual(cm.exception.opcode, 3)
self.assertEqual(cm.exception.opcode, Instruction.ADD)
class MulTest(unittest.TestCase):
def test_exec_mul_imm_imm(self):
@ -233,25 +207,25 @@ class MulTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, '942')
self.assertEqual(cm.exception.i, 0)
self.assertEqual(cm.exception.opcode, 4)
self.assertEqual(cm.exception.opcode, Instruction.MUL)
def test_exec_mul_throw_lab_1(self):
p = Program()
with self.assertRaises(TypecheckException) as cm:
p.parse_asm_line("mul r9 l2 1991")
p.parse_asm_line("mul r9 .l2 1991")
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, 'l2')
self.assertEqual(cm.exception.sarg, '.l2')
self.assertEqual(cm.exception.i, 1)
self.assertEqual(cm.exception.opcode, 4)
self.assertEqual(cm.exception.opcode, Instruction.MUL)
def test_exec_mul_throw_lab_2(self):
p = Program()
with self.assertRaises(TypecheckException) as cm:
p.parse_asm_line("mul r0 -11 l48")
p.parse_asm_line("mul r0 -11 .l48")
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, 'l48')
self.assertEqual(cm.exception.sarg, '.l48')
self.assertEqual(cm.exception.i, 2)
self.assertEqual(cm.exception.opcode, 4)
self.assertEqual(cm.exception.opcode, Instruction.MUL)
class DivTest(unittest.TestCase):
def test_div(self):
@ -295,7 +269,7 @@ class DivTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.REG)
self.assertEqual(cm.exception.sarg, '5')
self.assertEqual(cm.exception.i, 0)
self.assertEqual(cm.exception.opcode, 5)
self.assertEqual(cm.exception.opcode, Instruction.DIV)
def test_exec_div_throw_lab_1(self):
p = Program()
@ -304,7 +278,7 @@ class DivTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, '.qqweq')
self.assertEqual(cm.exception.i, 1)
self.assertEqual(cm.exception.opcode, 5)
self.assertEqual(cm.exception.opcode, Instruction.DIV)
def test_exec_div_throw_lab_2(self):
p = Program()
@ -313,7 +287,7 @@ class DivTest(unittest.TestCase):
self.assertEqual(cm.exception.argtype, ArgType.VAL)
self.assertEqual(cm.exception.sarg, '.24')
self.assertEqual(cm.exception.i, 2)
self.assertEqual(cm.exception.opcode, 5)
self.assertEqual(cm.exception.opcode, Instruction.DIV)
class LabelTest(unittest.TestCase):
def test_unconditional_jump(self):
@ -321,9 +295,9 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"mov r0 5",
"mov r0 6",
"j l0",
"j .l0",
"mov r0 7",
"CLB l0",
".l0",
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
@ -334,10 +308,10 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"add r0 10 0",
"add r1 20 0",
"CLB l0",
".loop_head",
"add r0 r0 -1",
"add r1 r1 1",
"jl l0 0 r0"
"jl .loop_head 0 r0"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
@ -349,10 +323,10 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"mov r0 30",
"mov r1 0",
"CLB l0",
".l0",
"add r0 r0 -1",
"add r1 r1 1",
"jls l0 -30 r0"
"jls .l0 -30 r0"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
@ -364,11 +338,11 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"mov r0 50",
"mov r1 0",
"CLB l0",
".l0",
"add r1 r1 1",
"mul r2 r0 -1",
"add r2 r2 r1",
"jne l0 r2 0"
"jne .l0 r2 0"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)
@ -381,14 +355,14 @@ class LabelTest(unittest.TestCase):
p.parse_lines([
"mov r0 0", # outer loop counter
"mov r2 0", # total iteration counter
"CLB l0",
".outer_loop",
"mov r1 0", # inner loop counter
"CLB l1",
".inner_loop",
"add r1 r1 1",
"add r2 r2 1",
"jl l1 r1 50",
"jl .inner_loop r1 50",
"add r0 r0 1",
"jl l0 r0 50"
"jl .outer_loop r0 50"
])
ex = ffi.Environment(p())
self.assertEqual(ex(), ffi.RunRet.STOP)